From 6ff3b19ee6120edf015fad8caab2991faa3070af Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Mon, 4 Sep 2017 18:44:23 +0100 Subject: COMPMID-344 Updated doxygen Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae --- arm_compute/core/AccessWindowAutoPadding.h | 76 ++ arm_compute/core/AccessWindowStatic.h | 92 ++ arm_compute/core/AccessWindowTranspose.h | 48 + arm_compute/core/CL/CLHelpers.h | 105 ++ arm_compute/core/CL/CLKernelLibrary.h | 248 +++++ arm_compute/core/CL/CLKernels.h | 90 ++ arm_compute/core/CL/CLTypes.h | 41 + arm_compute/core/CL/ICLArray.h | 118 +++ arm_compute/core/CL/ICLDistribution1D.h | 102 ++ arm_compute/core/CL/ICLHOG.h | 113 +++ arm_compute/core/CL/ICLKernel.h | 157 +++ arm_compute/core/CL/ICLLut.h | 94 ++ arm_compute/core/CL/ICLMultiHOG.h | 56 ++ arm_compute/core/CL/ICLMultiImage.h | 58 ++ arm_compute/core/CL/ICLSimple2DKernel.h | 41 + arm_compute/core/CL/ICLSimple3DKernel.h | 43 + arm_compute/core/CL/ICLSimpleKernel.h | 66 ++ arm_compute/core/CL/ICLTensor.h | 106 ++ arm_compute/core/CL/OpenCL.h | 43 + .../core/CL/kernels/CLAbsoluteDifferenceKernel.h | 71 ++ arm_compute/core/CL/kernels/CLAccumulateKernel.h | 91 ++ .../core/CL/kernels/CLActivationLayerKernel.h | 46 + .../core/CL/kernels/CLArithmeticAdditionKernel.h | 72 ++ .../CL/kernels/CLArithmeticSubtractionKernel.h | 74 ++ .../CL/kernels/CLBatchNormalizationLayerKernel.h | 77 ++ arm_compute/core/CL/kernels/CLBitwiseAndKernel.h | 68 ++ arm_compute/core/CL/kernels/CLBitwiseNotKernel.h | 49 + arm_compute/core/CL/kernels/CLBitwiseOrKernel.h | 68 ++ arm_compute/core/CL/kernels/CLBitwiseXorKernel.h | 68 ++ arm_compute/core/CL/kernels/CLBox3x3Kernel.h | 51 + arm_compute/core/CL/kernels/CLCannyEdgeKernel.h | 147 +++ .../core/CL/kernels/CLChannelCombineKernel.h | 83 ++ .../core/CL/kernels/CLChannelExtractKernel.h | 79 ++ arm_compute/core/CL/kernels/CLCol2ImKernel.h | 86 ++ arm_compute/core/CL/kernels/CLColorConvertKernel.h | 90 ++ arm_compute/core/CL/kernels/CLConvolutionKernel.h | 182 ++++ .../core/CL/kernels/CLDepthConcatenateKernel.h | 76 ++ arm_compute/core/CL/kernels/CLDepthConvertKernel.h | 61 ++ arm_compute/core/CL/kernels/CLDerivativeKernel.h | 72 ++ arm_compute/core/CL/kernels/CLDilateKernel.h | 51 + arm_compute/core/CL/kernels/CLErodeKernel.h | 51 + arm_compute/core/CL/kernels/CLFastCornersKernel.h | 114 +++ arm_compute/core/CL/kernels/CLFillBorderKernel.h | 77 ++ .../core/CL/kernels/CLGEMMInterleave4x4Kernel.h | 80 ++ .../CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h | 81 ++ .../kernels/CLGEMMMatrixAccumulateBiasesKernel.h | 63 ++ .../core/CL/kernels/CLGEMMMatrixAdditionKernel.h | 70 ++ .../core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 73 ++ .../core/CL/kernels/CLGEMMTranspose1xWKernel.h | 84 ++ arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h | 51 + arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h | 67 ++ .../core/CL/kernels/CLGaussianPyramidKernel.h | 100 ++ .../core/CL/kernels/CLHOGDescriptorKernel.h | 105 ++ arm_compute/core/CL/kernels/CLHOGDetectorKernel.h | 82 ++ .../core/CL/kernels/CLHarrisCornersKernel.h | 85 ++ arm_compute/core/CL/kernels/CLHistogramKernel.h | 98 ++ arm_compute/core/CL/kernels/CLIm2ColKernel.h | 111 +++ .../core/CL/kernels/CLIntegralImageKernel.h | 73 ++ arm_compute/core/CL/kernels/CLLKTrackerKernel.h | 183 ++++ .../CLLocallyConnectedMatrixMultiplyKernel.h | 68 ++ .../core/CL/kernels/CLMagnitudePhaseKernel.h | 77 ++ arm_compute/core/CL/kernels/CLMeanStdDevKernel.h | 74 ++ arm_compute/core/CL/kernels/CLMedian3x3Kernel.h | 51 + .../core/CL/kernels/CLMinMaxLocationKernel.h | 104 ++ .../core/CL/kernels/CLNonLinearFilterKernel.h | 63 ++ .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h | 52 + .../core/CL/kernels/CLNormalizationLayerKernel.h | 71 ++ .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 73 ++ arm_compute/core/CL/kernels/CLPoolingLayerKernel.h | 69 ++ arm_compute/core/CL/kernels/CLRemapKernel.h | 70 ++ arm_compute/core/CL/kernels/CLScaleKernel.h | 55 ++ arm_compute/core/CL/kernels/CLScharr3x3Kernel.h | 86 ++ arm_compute/core/CL/kernels/CLSobel3x3Kernel.h | 72 ++ arm_compute/core/CL/kernels/CLSobel5x5Kernel.h | 116 +++ arm_compute/core/CL/kernels/CLSobel7x7Kernel.h | 116 +++ arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 109 +++ arm_compute/core/CL/kernels/CLTableLookupKernel.h | 47 + arm_compute/core/CL/kernels/CLThresholdKernel.h | 56 ++ arm_compute/core/CL/kernels/CLTransposeKernel.h | 49 + arm_compute/core/CL/kernels/CLWarpAffineKernel.h | 51 + .../core/CL/kernels/CLWarpPerspectiveKernel.h | 51 + .../core/CL/kernels/CLWeightsReshapeKernel.h | 114 +++ arm_compute/core/CPP/CPPKernels.h | 32 + arm_compute/core/CPP/ICPPKernel.h | 53 + arm_compute/core/CPP/ICPPSimpleKernel.h | 66 ++ .../core/CPP/kernels/CPPCornerCandidatesKernel.h | 74 ++ .../CPPDetectionWindowNonMaximaSuppressionKernel.h | 72 ++ .../CPP/kernels/CPPSortEuclideanDistanceKernel.h | 70 ++ arm_compute/core/Coordinates.h | 61 ++ arm_compute/core/Dimensions.h | 178 ++++ arm_compute/core/Error.h | 160 +++ arm_compute/core/FixedPoint.h | 217 +++++ arm_compute/core/FixedPoint.inl | 252 +++++ arm_compute/core/HOGInfo.h | 146 +++ arm_compute/core/Helpers.h | 507 ++++++++++ arm_compute/core/Helpers.inl | 306 ++++++ arm_compute/core/IAccessWindow.h | 225 +++++ arm_compute/core/IArray.h | 149 +++ arm_compute/core/IDistribution.h | 59 ++ arm_compute/core/IDistribution1D.h | 84 ++ arm_compute/core/IHOG.h | 54 ++ arm_compute/core/IKernel.h | 72 ++ arm_compute/core/ILut.h | 69 ++ arm_compute/core/IMultiHOG.h | 61 ++ arm_compute/core/IMultiImage.h | 60 ++ arm_compute/core/IPyramid.h | 56 ++ arm_compute/core/ITensor.h | 90 ++ arm_compute/core/ITensorInfo.h | 195 ++++ arm_compute/core/MultiImageInfo.h | 66 ++ arm_compute/core/NEON/INEKernel.h | 33 + arm_compute/core/NEON/INESimpleKernel.h | 33 + arm_compute/core/NEON/NEColorConvertHelper.inl | 888 +++++++++++++++++ arm_compute/core/NEON/NEFixedPoint.h | 686 +++++++++++++ arm_compute/core/NEON/NEFixedPoint.inl | 1018 ++++++++++++++++++++ arm_compute/core/NEON/NEKernels.h | 96 ++ arm_compute/core/NEON/NEMath.h | 96 ++ arm_compute/core/NEON/NEMath.inl | 141 +++ .../core/NEON/kernels/NEAbsoluteDifferenceKernel.h | 82 ++ arm_compute/core/NEON/kernels/NEAccumulateKernel.h | 122 +++ .../core/NEON/kernels/NEActivationLayerKernel.h | 84 ++ .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 79 ++ .../NEON/kernels/NEArithmeticSubtractionKernel.h | 79 ++ .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 78 ++ arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h | 68 ++ arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h | 66 ++ arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h | 68 ++ arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h | 68 ++ arm_compute/core/NEON/kernels/NEBox3x3Kernel.h | 62 ++ arm_compute/core/NEON/kernels/NECannyEdgeKernel.h | 190 ++++ .../core/NEON/kernels/NEChannelCombineKernel.h | 125 +++ .../core/NEON/kernels/NEChannelExtractKernel.h | 109 +++ arm_compute/core/NEON/kernels/NECol2ImKernel.h | 100 ++ .../core/NEON/kernels/NEColorConvertKernel.h | 88 ++ .../core/NEON/kernels/NEConvolutionKernel.h | 251 +++++ .../NEON/kernels/NECumulativeDistributionKernel.h | 80 ++ .../core/NEON/kernels/NEDepthConcatenateKernel.h | 76 ++ .../core/NEON/kernels/NEDepthConvertKernel.h | 68 ++ arm_compute/core/NEON/kernels/NEDerivativeKernel.h | 94 ++ arm_compute/core/NEON/kernels/NEDilateKernel.h | 49 + .../NEDirectConvolutionLayerBiasAccumulateKernel.h | 74 ++ .../NEON/kernels/NEDirectConvolutionLayerKernel.h | 76 ++ arm_compute/core/NEON/kernels/NEErodeKernel.h | 49 + .../core/NEON/kernels/NEFastCornersKernel.h | 72 ++ arm_compute/core/NEON/kernels/NEFillArrayKernel.h | 73 ++ arm_compute/core/NEON/kernels/NEFillBorderKernel.h | 79 ++ .../core/NEON/kernels/NEFillInnerBorderKernel.h | 75 ++ .../core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 79 ++ .../NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h | 88 ++ .../kernels/NEGEMMMatrixAccumulateBiasesKernel.h | 63 ++ .../core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 81 ++ .../core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 75 ++ .../core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 82 ++ .../core/NEON/kernels/NEGaussian3x3Kernel.h | 50 + .../core/NEON/kernels/NEGaussian5x5Kernel.h | 73 ++ .../core/NEON/kernels/NEGaussianPyramidKernel.h | 100 ++ .../core/NEON/kernels/NEHOGDescriptorKernel.h | 141 +++ .../core/NEON/kernels/NEHOGDetectorKernel.h | 87 ++ .../core/NEON/kernels/NEHarrisCornersKernel.h | 126 +++ arm_compute/core/NEON/kernels/NEHistogramKernel.h | 129 +++ arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 114 +++ .../core/NEON/kernels/NEIntegralImageKernel.h | 50 + arm_compute/core/NEON/kernels/NELKTrackerKernel.h | 144 +++ .../NELocallyConnectedMatrixMultiplyKernel.h | 64 ++ .../core/NEON/kernels/NEMagnitudePhaseKernel.h | 164 ++++ arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h | 76 ++ arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h | 50 + .../core/NEON/kernels/NEMinMaxLocationKernel.h | 161 ++++ .../core/NEON/kernels/NENonLinearFilterKernel.h | 147 +++ .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 99 ++ .../core/NEON/kernels/NENormalizationLayerKernel.h | 106 ++ .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 105 ++ .../core/NEON/kernels/NEPoolingLayerKernel.h | 106 ++ arm_compute/core/NEON/kernels/NERemapKernel.h | 78 ++ arm_compute/core/NEON/kernels/NEScaleKernel.h | 89 ++ arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h | 82 ++ arm_compute/core/NEON/kernels/NESobel3x3Kernel.h | 82 ++ arm_compute/core/NEON/kernels/NESobel5x5Kernel.h | 118 +++ arm_compute/core/NEON/kernels/NESobel7x7Kernel.h | 122 +++ .../core/NEON/kernels/NESoftmaxLayerKernel.h | 135 +++ .../core/NEON/kernels/NETableLookupKernel.h | 76 ++ arm_compute/core/NEON/kernels/NEThresholdKernel.h | 81 ++ arm_compute/core/NEON/kernels/NETransposeKernel.h | 78 ++ arm_compute/core/NEON/kernels/NEWarpKernel.h | 117 +++ .../core/NEON/kernels/NEWeightsReshapeKernel.h | 94 ++ arm_compute/core/PixelValue.h | 168 ++++ arm_compute/core/PyramidInfo.h | 131 +++ arm_compute/core/Size2D.h | 84 ++ arm_compute/core/Steps.h | 66 ++ arm_compute/core/Strides.h | 62 ++ arm_compute/core/SubTensorInfo.h | 184 ++++ arm_compute/core/TensorInfo.h | 300 ++++++ arm_compute/core/TensorShape.h | 141 +++ arm_compute/core/Types.h | 636 ++++++++++++ arm_compute/core/Utils.h | 740 ++++++++++++++ arm_compute/core/Validate.h | 563 +++++++++++ arm_compute/core/Window.h | 355 +++++++ arm_compute/core/Window.inl | 182 ++++ arm_compute/runtime/Array.h | 75 ++ arm_compute/runtime/CL/CLArray.h | 108 +++ arm_compute/runtime/CL/CLDistribution1D.h | 79 ++ arm_compute/runtime/CL/CLFunctions.h | 94 ++ arm_compute/runtime/CL/CLHOG.h | 80 ++ arm_compute/runtime/CL/CLLut.h | 89 ++ arm_compute/runtime/CL/CLLutAllocator.h | 88 ++ arm_compute/runtime/CL/CLMultiHOG.h | 56 ++ arm_compute/runtime/CL/CLMultiImage.h | 87 ++ arm_compute/runtime/CL/CLPyramid.h | 82 ++ arm_compute/runtime/CL/CLScheduler.h | 158 +++ arm_compute/runtime/CL/CLSubTensor.h | 99 ++ arm_compute/runtime/CL/CLTensor.h | 81 ++ arm_compute/runtime/CL/CLTensorAllocator.h | 103 ++ arm_compute/runtime/CL/ICLSimpleFunction.h | 50 + .../runtime/CL/functions/CLAbsoluteDifference.h | 50 + arm_compute/runtime/CL/functions/CLAccumulate.h | 73 ++ .../runtime/CL/functions/CLActivationLayer.h | 51 + .../runtime/CL/functions/CLArithmeticAddition.h | 52 + .../runtime/CL/functions/CLArithmeticSubtraction.h | 53 + .../CL/functions/CLBatchNormalizationLayer.h | 67 ++ arm_compute/runtime/CL/functions/CLBitwiseAnd.h | 50 + arm_compute/runtime/CL/functions/CLBitwiseNot.h | 49 + arm_compute/runtime/CL/functions/CLBitwiseOr.h | 50 + arm_compute/runtime/CL/functions/CLBitwiseXor.h | 50 + arm_compute/runtime/CL/functions/CLBox3x3.h | 55 ++ arm_compute/runtime/CL/functions/CLCannyEdge.h | 85 ++ .../runtime/CL/functions/CLChannelCombine.h | 58 ++ .../runtime/CL/functions/CLChannelExtract.h | 56 ++ arm_compute/runtime/CL/functions/CLColorConvert.h | 68 ++ arm_compute/runtime/CL/functions/CLConvolution.h | 128 +++ .../runtime/CL/functions/CLConvolutionLayer.h | 121 +++ .../runtime/CL/functions/CLDepthConcatenate.h | 69 ++ arm_compute/runtime/CL/functions/CLDepthConvert.h | 60 ++ arm_compute/runtime/CL/functions/CLDerivative.h | 59 ++ arm_compute/runtime/CL/functions/CLDilate.h | 55 ++ .../runtime/CL/functions/CLEqualizeHistogram.h | 72 ++ arm_compute/runtime/CL/functions/CLErode.h | 55 ++ arm_compute/runtime/CL/functions/CLFastCorners.h | 88 ++ arm_compute/runtime/CL/functions/CLFillBorder.h | 49 + .../runtime/CL/functions/CLFullyConnectedLayer.h | 120 +++ arm_compute/runtime/CL/functions/CLGEMM.h | 84 ++ .../runtime/CL/functions/CLGEMMInterleave4x4.h | 50 + arm_compute/runtime/CL/functions/CLGEMMLowp.h | 85 ++ arm_compute/runtime/CL/functions/CLGaussian3x3.h | 55 ++ arm_compute/runtime/CL/functions/CLGaussian5x5.h | 70 ++ .../runtime/CL/functions/CLGaussianPyramid.h | 119 +++ arm_compute/runtime/CL/functions/CLHOGDescriptor.h | 72 ++ arm_compute/runtime/CL/functions/CLHOGDetector.h | 78 ++ arm_compute/runtime/CL/functions/CLHOGGradient.h | 72 ++ .../runtime/CL/functions/CLHOGMultiDetection.h | 105 ++ arm_compute/runtime/CL/functions/CLHarrisCorners.h | 104 ++ arm_compute/runtime/CL/functions/CLHistogram.h | 68 ++ arm_compute/runtime/CL/functions/CLIntegralImage.h | 60 ++ .../runtime/CL/functions/CLLaplacianPyramid.h | 85 ++ .../runtime/CL/functions/CLLaplacianReconstruct.h | 91 ++ .../runtime/CL/functions/CLLocallyConnectedLayer.h | 79 ++ arm_compute/runtime/CL/functions/CLMagnitude.h | 48 + arm_compute/runtime/CL/functions/CLMeanStdDev.h | 56 ++ arm_compute/runtime/CL/functions/CLMedian3x3.h | 55 ++ .../runtime/CL/functions/CLMinMaxLocation.h | 86 ++ .../runtime/CL/functions/CLNonLinearFilter.h | 61 ++ .../CL/functions/CLNonMaximaSuppression3x3.h | 55 ++ .../runtime/CL/functions/CLNormalizationLayer.h | 71 ++ arm_compute/runtime/CL/functions/CLOpticalFlow.h | 111 +++ arm_compute/runtime/CL/functions/CLPhase.h | 48 + .../CL/functions/CLPixelWiseMultiplication.h | 51 + arm_compute/runtime/CL/functions/CLPoolingLayer.h | 52 + arm_compute/runtime/CL/functions/CLRemap.h | 59 ++ arm_compute/runtime/CL/functions/CLScale.h | 52 + arm_compute/runtime/CL/functions/CLScharr3x3.h | 58 ++ arm_compute/runtime/CL/functions/CLSobel3x3.h | 58 ++ arm_compute/runtime/CL/functions/CLSobel5x5.h | 74 ++ arm_compute/runtime/CL/functions/CLSobel7x7.h | 74 ++ arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 69 ++ arm_compute/runtime/CL/functions/CLTableLookup.h | 47 + arm_compute/runtime/CL/functions/CLThreshold.h | 55 ++ arm_compute/runtime/CL/functions/CLTranspose.h | 50 + arm_compute/runtime/CL/functions/CLWarpAffine.h | 52 + .../runtime/CL/functions/CLWarpPerspective.h | 52 + arm_compute/runtime/CPP/CPPScheduler.h | 73 ++ arm_compute/runtime/Distribution1D.h | 55 ++ arm_compute/runtime/HOG.h | 56 ++ arm_compute/runtime/IFunction.h | 54 ++ arm_compute/runtime/ILutAllocator.h | 84 ++ arm_compute/runtime/IScheduler.h | 55 ++ arm_compute/runtime/ITensorAllocator.h | 93 ++ arm_compute/runtime/Lut.h | 68 ++ arm_compute/runtime/LutAllocator.h | 58 ++ arm_compute/runtime/MultiHOG.h | 58 ++ arm_compute/runtime/MultiImage.h | 96 ++ arm_compute/runtime/NEON/INESimpleFunction.h | 50 + arm_compute/runtime/NEON/NEFunctions.h | 96 ++ arm_compute/runtime/NEON/NEScheduler.h | 33 + .../runtime/NEON/functions/NEAbsoluteDifference.h | 50 + arm_compute/runtime/NEON/functions/NEAccumulate.h | 74 ++ .../runtime/NEON/functions/NEActivationLayer.h | 51 + .../runtime/NEON/functions/NEArithmeticAddition.h | 48 + .../NEON/functions/NEArithmeticSubtraction.h | 48 + .../NEON/functions/NEBatchNormalizationLayer.h | 66 ++ arm_compute/runtime/NEON/functions/NEBitwiseAnd.h | 46 + arm_compute/runtime/NEON/functions/NEBitwiseNot.h | 45 + arm_compute/runtime/NEON/functions/NEBitwiseOr.h | 46 + arm_compute/runtime/NEON/functions/NEBitwiseXor.h | 46 + arm_compute/runtime/NEON/functions/NEBox3x3.h | 58 ++ arm_compute/runtime/NEON/functions/NECannyEdge.h | 97 ++ .../runtime/NEON/functions/NEChannelCombine.h | 58 ++ .../runtime/NEON/functions/NEChannelExtract.h | 56 ++ .../runtime/NEON/functions/NEColorConvert.h | 65 ++ arm_compute/runtime/NEON/functions/NEConvolution.h | 128 +++ .../runtime/NEON/functions/NEConvolutionLayer.h | 115 +++ .../runtime/NEON/functions/NEDepthConcatenate.h | 66 ++ .../runtime/NEON/functions/NEDepthConvert.h | 67 ++ arm_compute/runtime/NEON/functions/NEDerivative.h | 70 ++ arm_compute/runtime/NEON/functions/NEDilate.h | 55 ++ .../NEON/functions/NEDirectConvolutionLayer.h | 72 ++ .../runtime/NEON/functions/NEEqualizeHistogram.h | 77 ++ arm_compute/runtime/NEON/functions/NEErode.h | 55 ++ arm_compute/runtime/NEON/functions/NEFastCorners.h | 80 ++ arm_compute/runtime/NEON/functions/NEFillBorder.h | 58 ++ .../runtime/NEON/functions/NEFullyConnectedLayer.h | 119 +++ arm_compute/runtime/NEON/functions/NEGEMM.h | 78 ++ .../runtime/NEON/functions/NEGEMMInterleave4x4.h | 49 + arm_compute/runtime/NEON/functions/NEGEMMLowp.h | 85 ++ .../runtime/NEON/functions/NEGEMMTranspose1xW.h | 47 + arm_compute/runtime/NEON/functions/NEGaussian3x3.h | 55 ++ arm_compute/runtime/NEON/functions/NEGaussian5x5.h | 71 ++ .../runtime/NEON/functions/NEGaussianPyramid.h | 122 +++ .../runtime/NEON/functions/NEHOGDescriptor.h | 71 ++ arm_compute/runtime/NEON/functions/NEHOGDetector.h | 57 ++ arm_compute/runtime/NEON/functions/NEHOGGradient.h | 72 ++ .../runtime/NEON/functions/NEHOGMultiDetection.h | 105 ++ .../runtime/NEON/functions/NEHarrisCorners.h | 103 ++ arm_compute/runtime/NEON/functions/NEHistogram.h | 63 ++ .../runtime/NEON/functions/NEIntegralImage.h | 45 + .../runtime/NEON/functions/NELaplacianPyramid.h | 85 ++ .../NEON/functions/NELaplacianReconstruct.h | 91 ++ .../NEON/functions/NELocallyConnectedLayer.h | 79 ++ arm_compute/runtime/NEON/functions/NEMagnitude.h | 47 + arm_compute/runtime/NEON/functions/NEMeanStdDev.h | 62 ++ arm_compute/runtime/NEON/functions/NEMedian3x3.h | 56 ++ .../runtime/NEON/functions/NEMinMaxLocation.h | 71 ++ .../runtime/NEON/functions/NENonLinearFilter.h | 61 ++ .../NEON/functions/NENonMaximaSuppression3x3.h | 56 ++ .../runtime/NEON/functions/NENormalizationLayer.h | 71 ++ arm_compute/runtime/NEON/functions/NEOpticalFlow.h | 95 ++ arm_compute/runtime/NEON/functions/NEPhase.h | 46 + .../NEON/functions/NEPixelWiseMultiplication.h | 50 + .../runtime/NEON/functions/NEPoolingLayer.h | 52 + arm_compute/runtime/NEON/functions/NERemap.h | 60 ++ arm_compute/runtime/NEON/functions/NEScale.h | 62 ++ arm_compute/runtime/NEON/functions/NEScharr3x3.h | 59 ++ arm_compute/runtime/NEON/functions/NESobel3x3.h | 59 ++ arm_compute/runtime/NEON/functions/NESobel5x5.h | 75 ++ arm_compute/runtime/NEON/functions/NESobel7x7.h | 75 ++ .../runtime/NEON/functions/NESoftmaxLayer.h | 71 ++ arm_compute/runtime/NEON/functions/NETableLookup.h | 47 + arm_compute/runtime/NEON/functions/NEThreshold.h | 54 ++ arm_compute/runtime/NEON/functions/NETranspose.h | 51 + arm_compute/runtime/NEON/functions/NEWarpAffine.h | 52 + .../runtime/NEON/functions/NEWarpPerspective.h | 52 + arm_compute/runtime/OMP/OMPScheduler.h | 68 ++ arm_compute/runtime/Pyramid.h | 76 ++ arm_compute/runtime/Scheduler.h | 77 ++ arm_compute/runtime/SingleThreadScheduler.h | 62 ++ arm_compute/runtime/SubTensor.h | 73 ++ arm_compute/runtime/Tensor.h | 65 ++ arm_compute/runtime/TensorAllocator.h | 90 ++ arm_compute/runtime/Utils.h | 41 + 366 files changed, 34736 insertions(+) create mode 100644 arm_compute/core/AccessWindowAutoPadding.h create mode 100644 arm_compute/core/AccessWindowStatic.h create mode 100644 arm_compute/core/AccessWindowTranspose.h create mode 100644 arm_compute/core/CL/CLHelpers.h create mode 100644 arm_compute/core/CL/CLKernelLibrary.h create mode 100644 arm_compute/core/CL/CLKernels.h create mode 100644 arm_compute/core/CL/CLTypes.h create mode 100644 arm_compute/core/CL/ICLArray.h create mode 100644 arm_compute/core/CL/ICLDistribution1D.h create mode 100644 arm_compute/core/CL/ICLHOG.h create mode 100644 arm_compute/core/CL/ICLKernel.h create mode 100644 arm_compute/core/CL/ICLLut.h create mode 100644 arm_compute/core/CL/ICLMultiHOG.h create mode 100644 arm_compute/core/CL/ICLMultiImage.h create mode 100644 arm_compute/core/CL/ICLSimple2DKernel.h create mode 100644 arm_compute/core/CL/ICLSimple3DKernel.h create mode 100644 arm_compute/core/CL/ICLSimpleKernel.h create mode 100644 arm_compute/core/CL/ICLTensor.h create mode 100644 arm_compute/core/CL/OpenCL.h create mode 100644 arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h create mode 100644 arm_compute/core/CL/kernels/CLAccumulateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLActivationLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseAndKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseNotKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseOrKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseXorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBox3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLCannyEdgeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLChannelCombineKernel.h create mode 100644 arm_compute/core/CL/kernels/CLChannelExtractKernel.h create mode 100644 arm_compute/core/CL/kernels/CLCol2ImKernel.h create mode 100644 arm_compute/core/CL/kernels/CLColorConvertKernel.h create mode 100644 arm_compute/core/CL/kernels/CLConvolutionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthConvertKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDerivativeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDilateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLErodeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLFastCornersKernel.h create mode 100644 arm_compute/core/CL/kernels/CLFillBorderKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHOGDetectorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHarrisCornersKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHistogramKernel.h create mode 100644 arm_compute/core/CL/kernels/CLIm2ColKernel.h create mode 100644 arm_compute/core/CL/kernels/CLIntegralImageKernel.h create mode 100644 arm_compute/core/CL/kernels/CLLKTrackerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h create mode 100644 arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h create mode 100644 arm_compute/core/CL/kernels/CLMeanStdDevKernel.h create mode 100644 arm_compute/core/CL/kernels/CLMedian3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h create mode 100644 arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h create mode 100644 arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h create mode 100644 arm_compute/core/CL/kernels/CLPoolingLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLRemapKernel.h create mode 100644 arm_compute/core/CL/kernels/CLScaleKernel.h create mode 100644 arm_compute/core/CL/kernels/CLScharr3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSobel3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSobel5x5Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSobel7x7Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLTableLookupKernel.h create mode 100644 arm_compute/core/CL/kernels/CLThresholdKernel.h create mode 100644 arm_compute/core/CL/kernels/CLTransposeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLWarpAffineKernel.h create mode 100644 arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h create mode 100644 arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h create mode 100644 arm_compute/core/CPP/CPPKernels.h create mode 100644 arm_compute/core/CPP/ICPPKernel.h create mode 100644 arm_compute/core/CPP/ICPPSimpleKernel.h create mode 100644 arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h create mode 100644 arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h create mode 100644 arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h create mode 100644 arm_compute/core/Coordinates.h create mode 100644 arm_compute/core/Dimensions.h create mode 100644 arm_compute/core/Error.h create mode 100644 arm_compute/core/FixedPoint.h create mode 100644 arm_compute/core/FixedPoint.inl create mode 100644 arm_compute/core/HOGInfo.h create mode 100644 arm_compute/core/Helpers.h create mode 100644 arm_compute/core/Helpers.inl create mode 100644 arm_compute/core/IAccessWindow.h create mode 100644 arm_compute/core/IArray.h create mode 100644 arm_compute/core/IDistribution.h create mode 100644 arm_compute/core/IDistribution1D.h create mode 100644 arm_compute/core/IHOG.h create mode 100644 arm_compute/core/IKernel.h create mode 100644 arm_compute/core/ILut.h create mode 100644 arm_compute/core/IMultiHOG.h create mode 100644 arm_compute/core/IMultiImage.h create mode 100644 arm_compute/core/IPyramid.h create mode 100644 arm_compute/core/ITensor.h create mode 100644 arm_compute/core/ITensorInfo.h create mode 100644 arm_compute/core/MultiImageInfo.h create mode 100644 arm_compute/core/NEON/INEKernel.h create mode 100644 arm_compute/core/NEON/INESimpleKernel.h create mode 100644 arm_compute/core/NEON/NEColorConvertHelper.inl create mode 100644 arm_compute/core/NEON/NEFixedPoint.h create mode 100644 arm_compute/core/NEON/NEFixedPoint.inl create mode 100644 arm_compute/core/NEON/NEKernels.h create mode 100644 arm_compute/core/NEON/NEMath.h create mode 100644 arm_compute/core/NEON/NEMath.inl create mode 100644 arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEAccumulateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEActivationLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBox3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NECannyEdgeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEChannelCombineKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEChannelExtractKernel.h create mode 100644 arm_compute/core/NEON/kernels/NECol2ImKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEColorConvertKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEConvolutionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDerivativeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDilateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEErodeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFastCornersKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFillArrayKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFillBorderKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHistogramKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEIm2ColKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEIntegralImageKernel.h create mode 100644 arm_compute/core/NEON/kernels/NELKTrackerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h create mode 100644 arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h create mode 100644 arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NERemapKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEScaleKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESobel3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESobel5x5Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESobel7x7Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NETableLookupKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEThresholdKernel.h create mode 100644 arm_compute/core/NEON/kernels/NETransposeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEWarpKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h create mode 100644 arm_compute/core/PixelValue.h create mode 100644 arm_compute/core/PyramidInfo.h create mode 100644 arm_compute/core/Size2D.h create mode 100644 arm_compute/core/Steps.h create mode 100644 arm_compute/core/Strides.h create mode 100644 arm_compute/core/SubTensorInfo.h create mode 100644 arm_compute/core/TensorInfo.h create mode 100644 arm_compute/core/TensorShape.h create mode 100644 arm_compute/core/Types.h create mode 100644 arm_compute/core/Utils.h create mode 100644 arm_compute/core/Validate.h create mode 100644 arm_compute/core/Window.h create mode 100644 arm_compute/core/Window.inl create mode 100644 arm_compute/runtime/Array.h create mode 100644 arm_compute/runtime/CL/CLArray.h create mode 100644 arm_compute/runtime/CL/CLDistribution1D.h create mode 100644 arm_compute/runtime/CL/CLFunctions.h create mode 100644 arm_compute/runtime/CL/CLHOG.h create mode 100644 arm_compute/runtime/CL/CLLut.h create mode 100644 arm_compute/runtime/CL/CLLutAllocator.h create mode 100644 arm_compute/runtime/CL/CLMultiHOG.h create mode 100644 arm_compute/runtime/CL/CLMultiImage.h create mode 100644 arm_compute/runtime/CL/CLPyramid.h create mode 100644 arm_compute/runtime/CL/CLScheduler.h create mode 100644 arm_compute/runtime/CL/CLSubTensor.h create mode 100644 arm_compute/runtime/CL/CLTensor.h create mode 100644 arm_compute/runtime/CL/CLTensorAllocator.h create mode 100644 arm_compute/runtime/CL/ICLSimpleFunction.h create mode 100644 arm_compute/runtime/CL/functions/CLAbsoluteDifference.h create mode 100644 arm_compute/runtime/CL/functions/CLAccumulate.h create mode 100644 arm_compute/runtime/CL/functions/CLActivationLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLArithmeticAddition.h create mode 100644 arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h create mode 100644 arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseAnd.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseNot.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseOr.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseXor.h create mode 100644 arm_compute/runtime/CL/functions/CLBox3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLCannyEdge.h create mode 100644 arm_compute/runtime/CL/functions/CLChannelCombine.h create mode 100644 arm_compute/runtime/CL/functions/CLChannelExtract.h create mode 100644 arm_compute/runtime/CL/functions/CLColorConvert.h create mode 100644 arm_compute/runtime/CL/functions/CLConvolution.h create mode 100644 arm_compute/runtime/CL/functions/CLConvolutionLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthConcatenate.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthConvert.h create mode 100644 arm_compute/runtime/CL/functions/CLDerivative.h create mode 100644 arm_compute/runtime/CL/functions/CLDilate.h create mode 100644 arm_compute/runtime/CL/functions/CLEqualizeHistogram.h create mode 100644 arm_compute/runtime/CL/functions/CLErode.h create mode 100644 arm_compute/runtime/CL/functions/CLFastCorners.h create mode 100644 arm_compute/runtime/CL/functions/CLFillBorder.h create mode 100644 arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLGEMM.h create mode 100644 arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h create mode 100644 arm_compute/runtime/CL/functions/CLGEMMLowp.h create mode 100644 arm_compute/runtime/CL/functions/CLGaussian3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLGaussian5x5.h create mode 100644 arm_compute/runtime/CL/functions/CLGaussianPyramid.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGDescriptor.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGDetector.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGGradient.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGMultiDetection.h create mode 100644 arm_compute/runtime/CL/functions/CLHarrisCorners.h create mode 100644 arm_compute/runtime/CL/functions/CLHistogram.h create mode 100644 arm_compute/runtime/CL/functions/CLIntegralImage.h create mode 100644 arm_compute/runtime/CL/functions/CLLaplacianPyramid.h create mode 100644 arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h create mode 100644 arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLMagnitude.h create mode 100644 arm_compute/runtime/CL/functions/CLMeanStdDev.h create mode 100644 arm_compute/runtime/CL/functions/CLMedian3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLMinMaxLocation.h create mode 100644 arm_compute/runtime/CL/functions/CLNonLinearFilter.h create mode 100644 arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLNormalizationLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLOpticalFlow.h create mode 100644 arm_compute/runtime/CL/functions/CLPhase.h create mode 100644 arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h create mode 100644 arm_compute/runtime/CL/functions/CLPoolingLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLRemap.h create mode 100644 arm_compute/runtime/CL/functions/CLScale.h create mode 100644 arm_compute/runtime/CL/functions/CLScharr3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLSobel3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLSobel5x5.h create mode 100644 arm_compute/runtime/CL/functions/CLSobel7x7.h create mode 100644 arm_compute/runtime/CL/functions/CLSoftmaxLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLTableLookup.h create mode 100644 arm_compute/runtime/CL/functions/CLThreshold.h create mode 100644 arm_compute/runtime/CL/functions/CLTranspose.h create mode 100644 arm_compute/runtime/CL/functions/CLWarpAffine.h create mode 100644 arm_compute/runtime/CL/functions/CLWarpPerspective.h create mode 100644 arm_compute/runtime/CPP/CPPScheduler.h create mode 100644 arm_compute/runtime/Distribution1D.h create mode 100644 arm_compute/runtime/HOG.h create mode 100644 arm_compute/runtime/IFunction.h create mode 100644 arm_compute/runtime/ILutAllocator.h create mode 100644 arm_compute/runtime/IScheduler.h create mode 100644 arm_compute/runtime/ITensorAllocator.h create mode 100644 arm_compute/runtime/Lut.h create mode 100644 arm_compute/runtime/LutAllocator.h create mode 100644 arm_compute/runtime/MultiHOG.h create mode 100644 arm_compute/runtime/MultiImage.h create mode 100644 arm_compute/runtime/NEON/INESimpleFunction.h create mode 100644 arm_compute/runtime/NEON/NEFunctions.h create mode 100644 arm_compute/runtime/NEON/NEScheduler.h create mode 100644 arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h create mode 100644 arm_compute/runtime/NEON/functions/NEAccumulate.h create mode 100644 arm_compute/runtime/NEON/functions/NEActivationLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEArithmeticAddition.h create mode 100644 arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h create mode 100644 arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseAnd.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseNot.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseOr.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseXor.h create mode 100644 arm_compute/runtime/NEON/functions/NEBox3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NECannyEdge.h create mode 100644 arm_compute/runtime/NEON/functions/NEChannelCombine.h create mode 100644 arm_compute/runtime/NEON/functions/NEChannelExtract.h create mode 100644 arm_compute/runtime/NEON/functions/NEColorConvert.h create mode 100644 arm_compute/runtime/NEON/functions/NEConvolution.h create mode 100644 arm_compute/runtime/NEON/functions/NEConvolutionLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConcatenate.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConvert.h create mode 100644 arm_compute/runtime/NEON/functions/NEDerivative.h create mode 100644 arm_compute/runtime/NEON/functions/NEDilate.h create mode 100644 arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h create mode 100644 arm_compute/runtime/NEON/functions/NEErode.h create mode 100644 arm_compute/runtime/NEON/functions/NEFastCorners.h create mode 100644 arm_compute/runtime/NEON/functions/NEFillBorder.h create mode 100644 arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMM.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMMLowp.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h create mode 100644 arm_compute/runtime/NEON/functions/NEGaussian3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NEGaussian5x5.h create mode 100644 arm_compute/runtime/NEON/functions/NEGaussianPyramid.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGDescriptor.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGDetector.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGGradient.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h create mode 100644 arm_compute/runtime/NEON/functions/NEHarrisCorners.h create mode 100644 arm_compute/runtime/NEON/functions/NEHistogram.h create mode 100644 arm_compute/runtime/NEON/functions/NEIntegralImage.h create mode 100644 arm_compute/runtime/NEON/functions/NELaplacianPyramid.h create mode 100644 arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h create mode 100644 arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEMagnitude.h create mode 100644 arm_compute/runtime/NEON/functions/NEMeanStdDev.h create mode 100644 arm_compute/runtime/NEON/functions/NEMedian3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NEMinMaxLocation.h create mode 100644 arm_compute/runtime/NEON/functions/NENonLinearFilter.h create mode 100644 arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NENormalizationLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEOpticalFlow.h create mode 100644 arm_compute/runtime/NEON/functions/NEPhase.h create mode 100644 arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h create mode 100644 arm_compute/runtime/NEON/functions/NEPoolingLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NERemap.h create mode 100644 arm_compute/runtime/NEON/functions/NEScale.h create mode 100644 arm_compute/runtime/NEON/functions/NEScharr3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NESobel3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NESobel5x5.h create mode 100644 arm_compute/runtime/NEON/functions/NESobel7x7.h create mode 100644 arm_compute/runtime/NEON/functions/NESoftmaxLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NETableLookup.h create mode 100644 arm_compute/runtime/NEON/functions/NEThreshold.h create mode 100644 arm_compute/runtime/NEON/functions/NETranspose.h create mode 100644 arm_compute/runtime/NEON/functions/NEWarpAffine.h create mode 100644 arm_compute/runtime/NEON/functions/NEWarpPerspective.h create mode 100644 arm_compute/runtime/OMP/OMPScheduler.h create mode 100644 arm_compute/runtime/Pyramid.h create mode 100644 arm_compute/runtime/Scheduler.h create mode 100644 arm_compute/runtime/SingleThreadScheduler.h create mode 100644 arm_compute/runtime/SubTensor.h create mode 100644 arm_compute/runtime/Tensor.h create mode 100644 arm_compute/runtime/TensorAllocator.h create mode 100644 arm_compute/runtime/Utils.h (limited to 'arm_compute') diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h new file mode 100644 index 0000000000..0a3344b115 --- /dev/null +++ b/arm_compute/core/AccessWindowAutoPadding.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__ +#define __ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Dummy access window. + * + * This implementation always uses the auto padding of the tensor info and + * never updates the window. The valid region is always set to cover the entire + * tensor. + * + * @note This access window is only used during the migration to the new + * padding system. It will be removed once all kernels have been ported. + * + * */ +class AccessWindowAutoPadding : public IAccessWindow +{ +public: + /** Default constructor. + * + * @param[in,out] info Tensor info of the accessed kernel. + */ + AccessWindowAutoPadding(ITensorInfo *info); + AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete; + AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete; + AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default; + AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default; + ~AccessWindowAutoPadding() = default; + + /** Set the valid region to match the entire tensor. */ + void set_valid_region(); + + /** Return a valid region that spans across the entire tensor. */ + ValidRegion compute_valid_region() const; + + // Inherited methods overridden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + +private: + ITensorInfo *_info; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__*/ diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h new file mode 100644 index 0000000000..6dcba072c4 --- /dev/null +++ b/arm_compute/core/AccessWindowStatic.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Implementation of a static rectangular access pattern. + * + * In this implementation the access offsets and sizes are not relative to the + * current element. Instead they are considered to be absolute coordinates + * within the accessed tensor's shape. + * + * */ +class AccessWindowStatic : public IAccessWindow +{ +public: + /** Constructor for a static access pattern. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] start_x Start of the access in X direction. + * @param[in] start_y Start of the access in Y direction. + * @param[in] end_x End of the access in X direction. + * @param[in] end_y End of the access in Y direction. + */ + AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y); + + AccessWindowStatic(const AccessWindowStatic &) = delete; + AccessWindowStatic &operator=(const AccessWindowStatic &) = delete; + AccessWindowStatic(AccessWindowStatic &&) = default; + AccessWindowStatic &operator=(AccessWindowStatic &&) = default; + ~AccessWindowStatic() = default; + + /** Set the valid region based on the static access pattern and valid + * region of the inputs. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + void set_valid_region(const Window &window, const ValidRegion &input_valid_region); + + /** Compute the valid region based on the static access pattern and valid region of the inputs. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const; + + // Inherited methods overriden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + + ITensorInfo *_info; + int _start_x; + int _start_y; + int _end_x; + int _end_y; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__*/ diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h new file mode 100644 index 0000000000..102860f9d8 --- /dev/null +++ b/arm_compute/core/AccessWindowTranspose.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Implementation of a XY-transpose access pattern. */ +class AccessWindowTranspose : public AccessWindowRectangle +{ +public: + using AccessWindowRectangle::AccessWindowRectangle; + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + using AccessWindowRectangle::compute_valid_region; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__*/ diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h new file mode 100644 index 0000000000..26253e3f38 --- /dev/null +++ b/arm_compute/core/CL/CLHelpers.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHELPERS_H__ +#define __ARM_COMPUTE_CLHELPERS_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Helpers.h" + +#include + +namespace arm_compute +{ +enum class DataType; +enum class GPUTarget; + +/** Enable operation operations on GPUTarget enumerations */ +template <> +struct enable_bitwise_ops +{ + static constexpr bool value = true; +}; + +/** Max vector width of an OpenCL vector */ +static constexpr const unsigned int max_cl_vector_width = 16; + +/** Translates a tensor data type to the appropriate OpenCL type. + * + * @param[in] dt @ref DataType to be translated to OpenCL type. + * + * @return The string specifying the OpenCL type to be used. + */ +std::string get_cl_type_from_data_type(const DataType &dt); + +/** Translates a given gpu device target to string. + * + * @param[in] target Given gpu target. + * + * @return The string describing the target. + */ +const std::string &string_from_target(GPUTarget target); + +/** Helper function to create and return a unique_ptr pointed to a CL kernel object + * It also calls the kernel's configuration. + * + * @param[in] args All the arguments that need pass to kernel's configuration. + * + * @return A unique pointer pointed to a CL kernel object + */ +template +std::unique_ptr create_configure_kernel(T &&... args) +{ + std::unique_ptr k = arm_compute::cpp14::make_unique(); + k->configure(std::forward(args)...); + return k; +} + +/** Helper function to create and return a unique_ptr pointed to a CL kernel object + * + * @return A unique pointer pointed to a CL kernel object + */ +template +std::unique_ptr create_kernel() +{ + std::unique_ptr k = arm_compute::cpp14::make_unique(); + return k; +} + +/** Helper function to get the GPU target from CL device + * + * @param[in] device A CL device + * + * @return the GPU target + */ +GPUTarget get_target_from_device(cl::Device &device); + +/** Helper function to get the GPU arch + * + * @param[in] target GPU target + * + * @return the GPU target which shows the arch + */ +GPUTarget get_arch_from_target(GPUTarget target); +} +#endif diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h new file mode 100644 index 0000000000..c29610c252 --- /dev/null +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_H__ +#define __ARM_COMPUTE_CLKERNELLIBRARY_H__ + +#include "arm_compute/core/CL/OpenCL.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Program class */ +class Program +{ +public: + /** Default constructor. */ + Program(); + /** Construct program from source file. + * + * @param[in] context CL context used to create the program. + * @param[in] name Program name. + * @param[in] source Program source. + */ + Program(cl::Context context, std::string name, std::string source); + /** Construct program from binary file. + * + * @param[in] context CL context used to create the program. + * @param[in] device CL device for which the programs are created. + * @param[in] name Program name. + * @param[in] binary Program binary. + */ + Program(cl::Context context, cl::Device device, std::string name, std::vector binary); + /** Default Copy Constructor. */ + Program(const Program &) = default; + /** Default Move Constructor. */ + Program(Program &&) = default; + /** Default copy assignment operator. */ + Program &operator=(const Program &) = default; + /** Default move assignment operator. */ + Program &operator=(Program &&) = default; + /**Returns program name. + * + * @return Program's name. + */ + std::string name() const + { + return _name; + } + /** User-defined conversion to the underlying CL program. + * + * @return The CL program object. + */ + explicit operator cl::Program() const; + + static bool build(const cl::Program &program, const std::string &build_options = ""); + /** Build the underlying CL program. + * + * @param[in] build_options Options used to build the CL program. + * + * @return A reference to itself. + */ + cl::Program build(const std::string &build_options = "") const; + +private: + cl::Context _context; /**< Underlying CL context. */ + cl::Device _device; /**< CL device for which the programs are created. */ + bool _is_binary; /**< Create program from binary? */ + std::string _name; /**< Program name. */ + std::string _source; /**< Source code for the program. */ + std::vector _binary; /**< Binary from which to create the program. */ +}; + +/** Kernel class */ +class Kernel +{ +public: + /** Default Constructor. */ + Kernel(); + /** Default Copy Constructor. */ + Kernel(const Kernel &) = default; + /** Default Move Constructor. */ + Kernel(Kernel &&) = default; + /** Default copy assignment operator. */ + Kernel &operator=(const Kernel &) = default; + /** Default move assignment operator. */ + Kernel &operator=(Kernel &&) = default; + /** Constructor. + * + * @param[in] name Kernel name. + * @param[in] program Built program. + */ + Kernel(std::string name, const cl::Program &program); + /** Returns kernel name. + * + * @return Kernel's name. + */ + std::string name() const + { + return _name; + } + /** Returns OpenCL kernel. + * + * @return OpenCL Kernel. + */ + explicit operator cl::Kernel() const + { + return _kernel; + } + +private: + std::string _name; /**< Kernel name */ + cl::Kernel _kernel; /**< OpenCL Kernel */ +}; + +/** CLKernelLibrary class */ +class CLKernelLibrary +{ + using StringSet = std::set; + +private: + /** Default Constructor. */ + CLKernelLibrary(); + +public: + /** Prevent instances of this class from being copied. */ + CLKernelLibrary(const CLKernelLibrary &) = delete; + /** Prevent instances of this class from being copied. */ + const CLKernelLibrary &operator=(const CLKernelLibrary &) = delete; + /** Access the KernelLibrary singleton. + * @return The KernelLibrary instance. + */ + static CLKernelLibrary &get(); + /** Initialises the kernel library. + * + * @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded. + * @param[in] context (Optional) CL context used to create programs. + * @param[in] device (Optional) CL device for which the programs are created. + */ + void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault()) + { + _kernel_path = std::move(kernel_path); + _context = std::move(context); + _device = std::move(device); + } + /** Sets the path that the kernels reside in. + * + * @param[in] kernel_path Path of the kernel. + */ + void set_kernel_path(const std::string &kernel_path) + { + _kernel_path = kernel_path; + }; + /** Sets the CL context used to create programs. + * + * @note Setting the context also resets the device to the + * first one available in the new context. + * + * @param[in] context A CL context. + */ + void set_context(cl::Context context) + { + _context = std::move(context); + + const auto cl_devices = _context.getInfo(); + + if(cl_devices.empty()) + { + _device = cl::Device(); + } + else + { + _device = cl_devices[0]; + } + }; + /** Sets the CL device for which the programs are created. + * + * @param[in] device A CL device. + */ + void set_device(cl::Device device) + { + _device = std::move(device); + }; + /** Creates a kernel from the kernel library. + * + * @param[in] kernel_name Kernel name. + * @param[in] build_options_set Kernel build options as a set. + * + * @return The created kernel. + */ + Kernel create_kernel(const std::string &kernel_name, const StringSet &build_options_set = {}) const; + /** Serializes and saves programs to a binary. + * + */ + void save_binary(); + /** Load serialized binary with all the programs. + * + */ + void load_binary(); + +private: + /** Load program and its dependencies. + * + * @param[in] program_name Name of the program to load. + */ + const Program &load_program(const std::string &program_name) const; + /** Concatenates contents of a set into a single string. + * + * @param[in] s Input set to concatenate. + * + * @return Concatenated string. + */ + std::string stringify_set(const StringSet &s) const; + + cl::Context _context; /**< Underlying CL context. */ + cl::Device _device; /**< Underlying CL device. */ + std::string _kernel_path; /**< Path to the kernels folder. */ + mutable std::map _programs_map; /**< Map with all already loaded program data. */ + mutable std::map _built_programs_map; /**< Map with all already built program data. */ + static const std::map _kernel_program_map; /**< Map that associates kernel names with programs. */ + static const std::map _program_source_map; /**< Contains sources for all programs. + Used for compile-time kernel inclusion. >*/ +}; +} +#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */ diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h new file mode 100644 index 0000000000..0e9f356e52 --- /dev/null +++ b/arm_compute/core/CL/CLKernels.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLKERNELS_H__ +#define __ARM_COMPUTE_CLKERNELS_H__ + +/* Header regrouping all the CL kernels */ +#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" +#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" +#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h" +#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" +#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" +#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" +#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" +#include "arm_compute/core/CL/kernels/CLDilateKernel.h" +#include "arm_compute/core/CL/kernels/CLErodeKernel.h" +#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" +#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLRemapKernel.h" +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" +#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" +#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" + +#endif /* __ARM_COMPUTE_CLKERNELS_H__ */ diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h new file mode 100644 index 0000000000..c5643d8939 --- /dev/null +++ b/arm_compute/core/CL/CLTypes.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CL_TYPES_H__ +#define __ARM_COMPUTE_CL_TYPES_H__ + +namespace arm_compute +{ +/** Available GPU Targets */ +enum class GPUTarget +{ + GPU_ARCH_MASK = 0xF00, + MIDGARD = 0x100, + BIFROST = 0x200, + T600 = 0x110, + T700 = 0x120, + T800 = 0x130, + G70 = 0x210 +}; +} +#endif /* __ARM_COMPUTE_CL_TYPES_H__ */ diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h new file mode 100644 index 0000000000..1b676ed5a3 --- /dev/null +++ b/arm_compute/core/CL/ICLArray.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLARRAY_H__ +#define __ARM_COMPUTE_ICLARRAY_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/ITensor.h" + +namespace arm_compute +{ +/** Interface for OpenCL Array */ +template +class ICLArray : public IArray +{ +public: + /* Constructor */ + explicit ICLArray(size_t max_num_values) + : IArray(max_num_values), _mapping(nullptr) + { + } + + ICLArray(const ICLArray &) = delete; + ICLArray &operator=(const ICLArray &) = delete; + virtual ~ICLArray() = default; + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the array's data. + * + * @return A reference to an OpenCL buffer containing the array's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true) + { + _mapping = do_map(q, blocking); + } + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q) + { + do_unmap(q, _mapping); + _mapping = nullptr; + } + + // Inherited methods overridden: + T *buffer() const override + { + return reinterpret_cast(_mapping); + } + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping Pointer to the buffer to be unmapped. + */ + virtual void do_unmap(cl::CommandQueue &q, uint8_t *mapping) = 0; + +private: + uint8_t *_mapping; +}; + +using ICLKeyPointArray = ICLArray; +using ICLCoordinates2DArray = ICLArray; +using ICLDetectionWindowArray = ICLArray; +using ICLSize2DArray = ICLArray; +using ICLUInt8Array = ICLArray; +using ICLUInt16Array = ICLArray; +using ICLUInt32Array = ICLArray; +using ICLInt16Array = ICLArray; +using ICLInt32Array = ICLArray; +using ICLFloatArray = ICLArray; +} +#endif /*__ARM_COMPUTE_ICLARRAY_H__*/ diff --git a/arm_compute/core/CL/ICLDistribution1D.h b/arm_compute/core/CL/ICLDistribution1D.h new file mode 100644 index 0000000000..8fbbbbf548 --- /dev/null +++ b/arm_compute/core/CL/ICLDistribution1D.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution1D.h" + +#include +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** ICLDistribution1D interface class */ +class ICLDistribution1D : public IDistribution1D +{ +public: + /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + ICLDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLDistribution1D(const ICLDistribution1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + const ICLDistribution1D &operator=(const ICLDistribution1D &) = delete; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the distribution's data. + * + * @return A reference to an OpenCL buffer containing the distribution's data. + */ + virtual cl::Buffer &cl_buffer() = 0; + // Inherited methods overridden: + uint32_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint32_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +protected: + uint32_t *_mapping; /**< The distribution data. */ +}; +} +#endif /* __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ */ diff --git a/arm_compute/core/CL/ICLHOG.h b/arm_compute/core/CL/ICLHOG.h new file mode 100644 index 0000000000..a3d2fb4a57 --- /dev/null +++ b/arm_compute/core/CL/ICLHOG.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLHOG_H__ +#define __ARM_COMPUTE_ICLHOG_H__ + +#include "arm_compute/core/IHOG.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL HOG data-object */ +class ICLHOG : public IHOG +{ +public: + /** Default constructor */ + ICLHOG(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLHOG(const ICLHOG &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLHOG &operator=(const ICLHOG &) = delete; + /** Allow instances of this class to be moved */ + ICLHOG(ICLHOG &&) = default; + /** Allow instances of this class to be moved */ + ICLHOG &operator=(ICLHOG &&) = default; + /** Default destructor */ + virtual ~ICLHOG() = default; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the hog's descriptor + * + * @return A reference to an OpenCL buffer containing the hog's descriptor + */ + virtual const cl::Buffer &cl_buffer() const = 0; + + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true); + + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + + /** Interface to be implemented by the child class to free the allocated cl buffer. + * + * @warning The buffer must have been allocated previously. Otherwise calling the function will fail. + */ + virtual void free() = 0; + + // Inherited methods overridden: + float *descriptor() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; +} +#endif /*__ARM_COMPUTE_ICLHOG_H__ */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h new file mode 100644 index 0000000000..72c963d11b --- /dev/null +++ b/arm_compute/core/CL/ICLKernel.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLKERNEL_H__ +#define __ARM_COMPUTE_ICLKERNEL_H__ + +#include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class Window; + +/** Common interface for all the OpenCL kernels */ +class ICLKernel : public IKernel +{ +public: + /** Constructor */ + ICLKernel(); + /** Returns a reference to the OpenCL kernel of this object. + * + * @return A reference to the OpenCL kernel of this object. + */ + cl::Kernel &kernel(); + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per 1D tensor object. + * + * @return The number of arguments enqueues per 1D tensor object. + */ + unsigned int num_arguments_per_1D_tensor() const; + /** Returns the number of arguments enqueued per 2D tensor object. + * + * @return The number of arguments enqueues per 2D tensor object. + */ + unsigned int num_arguments_per_2D_tensor() const; + /** Returns the number of arguments enqueued per 3D tensor object. + * + * @return The number of arguments enqueues per 3D tensor object. + */ + unsigned int num_arguments_per_3D_tensor() const; + /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. + * + * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + virtual void run(const Window &window, cl::CommandQueue &queue) = 0; + /** Add the passed parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set. + * @param[in] value Value to set as an argument of the object's kernel. + */ + template + void add_argument(unsigned int &idx, T value) + { + _kernel.setArg(idx++, value); + } + + /** Set the targeted GPU architecture + * + * @param[in] target The targeted GPU architecture + */ + void set_target(GPUTarget target); + + /** Set the targeted GPU architecture according to the CL device + * + * @param[in] device A CL device + */ + void set_target(cl::Device &device); + + /** Get the targeted GPU architecture + * + * @return The targeted GPU architecture. + */ + GPUTarget get_target() const; + +private: + /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per tensor object. + * + * @return The number of arguments enqueued per tensor object. + */ + template + unsigned int num_arguments_per_tensor() const; + +protected: + cl::Kernel _kernel; /**< OpenCL kernel to run */ + cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ + GPUTarget _target; /**< The targeted GPU */ +}; + +/** Add the kernel to the command queue with the given window. + * + * @note Depending on the size of the window, this might translate into several jobs being enqueued. + * + * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. + * + * @param[in,out] queue OpenCL command queue. + * @param[in] kernel Kernel to enqueue + * @param[in] window Window the kernel has to process. + * @param[in] lws_hint Local workgroup size requested, by default (128,1) + * + * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. + */ +void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = cl::Range_128_1); +} +#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLLut.h b/arm_compute/core/CL/ICLLut.h new file mode 100644 index 0000000000..2016ebb5c3 --- /dev/null +++ b/arm_compute/core/CL/ICLLut.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLLUT_H__ +#define __ARM_COMPUTE_ICLLUT_H__ + +#include "arm_compute/core/ILut.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL LUT */ +class ICLLut : public ILut +{ +public: + ICLLut(); + ICLLut(const ICLLut &) = delete; + ICLLut &operator=(const ICLLut &) = delete; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the lut's data. + * + * @return A reference to an OpenCL buffer containing the lut's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + + // Inherited methods overridden: + uint8_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; +} +#endif /*__ARM_COMPUTE_ICLLUT_H__ */ diff --git a/arm_compute/core/CL/ICLMultiHOG.h b/arm_compute/core/CL/ICLMultiHOG.h new file mode 100644 index 0000000000..9f3c775230 --- /dev/null +++ b/arm_compute/core/CL/ICLMultiHOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLMULTIHOG_H__ +#define __ARM_COMPUTE_ICLMULTIHOG_H__ + +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/IMultiHOG.h" + +namespace arm_compute +{ +/** Interface for storing multiple HOG data-objects */ +class ICLMultiHOG : public IMultiHOG +{ +public: + /** Return a pointer to the requested OpenCL HOG model + * + * @param[in] index The index of the wanted OpenCL HOG model. + * + * @return A pointer pointed to the HOG model + */ + virtual ICLHOG *cl_model(size_t index) = 0; + /** Return a constant pointer to the requested OpenCL HOG model + * + * @param[in] index The index of the wanted OpenCL HOG model. + * + * @return A constant pointer pointed to the OpenCL HOG model + */ + virtual const ICLHOG *cl_model(size_t index) const = 0; + + // Inherited methods overridden: + IHOG *model(size_t index) override; + const IHOG *model(size_t index) const override; +}; +} +#endif /*__ARM_COMPUTE_ICLMULTIHOG_H__ */ diff --git a/arm_compute/core/CL/ICLMultiImage.h b/arm_compute/core/CL/ICLMultiImage.h new file mode 100644 index 0000000000..e8705b1824 --- /dev/null +++ b/arm_compute/core/CL/ICLMultiImage.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLMULTIIMAGE_H__ +#define __ARM_COMPUTE_ICLMULTIIMAGE_H__ + +#include "arm_compute/core/IMultiImage.h" + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for OpenCL multi-planar images */ +class ICLMultiImage : public IMultiImage +{ +public: + /** Return a pointer to the requested OpenCL plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A pointer pointed to the OpenCL plane + */ + virtual ICLImage *cl_plane(unsigned int index) = 0; + /** Return a constant pointer to the requested OpenCL plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A constant pointer pointed to the OpenCL plane + */ + virtual const ICLImage *cl_plane(unsigned int index) const = 0; + + // Inherited methods overridden: + IImage *plane(unsigned int index) override; + const IImage *plane(unsigned int index) const override; +}; +} +#endif /*__ARM_COMPUTE_ICLMULTIIMAGE_H__ */ diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/arm_compute/core/CL/ICLSimple2DKernel.h new file mode 100644 index 0000000000..a1366fb211 --- /dev/null +++ b/arm_compute/core/CL/ICLSimple2DKernel.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ +class ICLSimple2DKernel : public ICLSimpleKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/arm_compute/core/CL/ICLSimple3DKernel.h new file mode 100644 index 0000000000..5e981027de --- /dev/null +++ b/arm_compute/core/CL/ICLSimple3DKernel.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. + * Both input tensor and output tensor must have at least 3 dimensions. + */ +class ICLSimple3DKernel : public ICLSimple2DKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h new file mode 100644 index 0000000000..e9fdb7fb8b --- /dev/null +++ b/arm_compute/core/CL/ICLSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" + +namespace arm_compute +{ +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */ +class ICLSimpleKernel : public ICLKernel +{ +public: + /** Constructor. */ + ICLSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLSimpleKernel(const ICLSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete; + /** Allow instances of this class to be moved. */ + ICLSimpleKernel(ICLSimpleKernel &&) = default; + /** Allow instances of this class to be moved. */ + ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default; + /** Default destructor */ + ~ICLSimpleKernel() = default; + + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const ICLTensor *_input; + ICLTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_ICLSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h new file mode 100644 index 0000000000..abc0131379 --- /dev/null +++ b/arm_compute/core/CL/ICLTensor.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLTENSOR_H__ +#define __ARM_COMPUTE_ICLTENSOR_H__ + +#include "arm_compute/core/ITensor.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL tensor */ +class ICLTensor : public ITensor +{ +public: + ICLTensor(); + ICLTensor(const ICLTensor &) = delete; + ICLTensor &operator=(const ICLTensor &) = delete; + ICLTensor(ICLTensor &&) = default; + ICLTensor &operator=(ICLTensor &&) = default; + virtual ~ICLTensor() = default; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data. + * + * @return A reference to an OpenCL buffer containing the image's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + /** Clear the contents of the tensor synchronously. + * + * @param[in,out] q The CL command queue to use for the clear operation. + */ + void clear(cl::CommandQueue &q); + + // Inherited methods overridden: + uint8_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; + +using ICLImage = ICLTensor; +} +#endif /*__ARM_COMPUTE_ICLTENSOR_H__ */ diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h new file mode 100644 index 0000000000..2fae35c974 --- /dev/null +++ b/arm_compute/core/CL/OpenCL.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OPENCL_H__ +#define __ARM_COMPUTE_OPENCL_H__ + +/* Configure the Khronos C++ wrapper to target OpenCL 1.2: */ +#define CL_HPP_ENABLE_EXCEPTIONS +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 110 +#define CL_HPP_MINIMUM_OPENCL_VERSION 110 +#include + +namespace cl +{ +static const NDRange Range_128_1 = NDRange(128, 1); +} + +namespace arm_compute +{ +bool opencl_is_available(); +} +#endif /* __ARM_COMPUTE_OPENCL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..e8bd6aac7f --- /dev/null +++ b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the absolute difference kernel. + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class CLAbsoluteDifferenceKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved. */ + CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved. */ + CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~CLAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output images. + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1. */ + const ICLTensor *_input2; /**< Source tensor 2. */ + ICLTensor *_output; /**< Destination tensor. */ +}; +} +#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h new file mode 100644 index 0000000000..5c8ffdb404 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLAccumulateKernel.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_CLACCUMULATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the accumulate kernel. + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class CLAccumulateKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); +}; + +/** Interface for the accumulate weighted kernel. + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class CLAccumulateWeightedKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation images, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Interface for the accumulate squared kernel. + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class CLAccumulateSquaredKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} +#endif /*__ARM_COMPUTE_CLACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h new file mode 100644 index 0000000000..490e70544b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple3DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the activation layer kernel. */ +class CLActivationLayerKernel : public ICLSimple3DKernel +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer information. + */ + void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); +}; +} +#endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h new file mode 100644 index 0000000000..7d736cdf44 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ +#define __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the arithmetic addition kernel + * + * Arithmetic addition is computed by: + * @f[ output(x,y) = input1(x,y) + input2(x,y) @f] + */ +class CLArithmeticAdditionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticAdditionKernel(const CLArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticAdditionKernel &operator=(const CLArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArithmeticAdditionKernel(CLArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArithmeticAdditionKernel &operator=(CLArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~CLArithmeticAdditionKernel() = default; + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..afecf6ed7d --- /dev/null +++ b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ +#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the arithmetic subtraction kernel + * + * Arithmetic subtraction is computed by: + * @f[ output(x,y) = input1(x,y) - input2(x,y) @f] + */ +class CLArithmeticSubtractionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionKernel(const CLArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionKernel &operator=(const CLArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionKernel(CLArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionKernel &operator=(CLArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~CLArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..088853841b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the BatchNormalization layer kernel. + */ +class CLBatchNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * The rest are optional and used for representing batches. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_mean; + const ICLTensor *_var; + const ICLTensor *_beta; + const ICLTensor *_gamma; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h new file mode 100644 index 0000000000..624c422abc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise AND operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class CLBitwiseAndKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h new file mode 100644 index 0000000000..c9026022e1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise NOT operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class CLBitwiseNotKernel : public ICLSimple2DKernel +{ +public: + /** Set the inputs and output images. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h new file mode 100644 index 0000000000..fe8710fbc1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEORKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise OR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class CLBitwiseOrKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h new file mode 100644 index 0000000000..f4e0b4df60 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise XOR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class CLBitwiseXorKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h new file mode 100644 index 0000000000..0960f7487a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBOX3X3KERNEL_H__ +#define __ARM_COMPUTE_CLBOX3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the box 3x3 filter kernel. + * + */ +class CLBox3x3Kernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + //Inherited methods overriden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLBOX3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h new file mode 100644 index 0000000000..5ca3e03412 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ +#define __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform Gradient computation. + */ +class CLGradientKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLGradientKernel(const CLGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLGradientKernel &operator=(const CLGradientKernel &) = delete; + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Source tensor - Gx component */ + const ICLTensor *_gy; /**< Source tensor - Gy component */ + ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ + ICLTensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed. + * + * @note Hysteresis is computed in @ref CLEdgeTraceKernel + */ +class CLEdgeNonMaxSuppressionKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U16/U32. + * @param[in] lower_thr Lower threshold. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ + const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ + ICLTensor *_output; /**< Destination tensor. */ +}; + +/** OpenCL kernel to perform Edge tracing. + */ +class CLEdgeTraceKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor. */ + ICLTensor *_output; /**< Destination tensor. */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ + ICLTensor *_visited; /**< Marks visited elements */ + ICLTensor *_recorded; /**< Marks recorded elements */ + ICLTensor *_l1_stack; /**< L1 hysteris stack */ + ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ +}; +} +#endif /* __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h new file mode 100644 index 0000000000..3e718a2f1a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ +#define __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel combine kernel */ +class CLChannelCombineKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel(CLChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; + /** Default destructor */ + ~CLChannelCombineKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + std::array _planes; + ICLTensor *_output; + ICLMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; +}; +} +#endif /* __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h new file mode 100644 index 0000000000..3e9e699a50 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ +#define __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel extract kernel */ +class CLChannelExtractKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel(CLChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; + /** Default destructor */ + ~CLChannelExtractKernel() = default; + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _num_elems_processed_per_iteration; + uint32_t _subsampling; +}; +} +#endif /* __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h new file mode 100644 index 0000000000..9d445e3004 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOL2IMKERNEL_H__ +#define __ARM_COMPUTE_CLCOL2IMKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the col2im reshaping kernel. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class CLCol2ImKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel(const CLCol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCol2ImKernel(CLCol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; + /** Default destructor */ + ~CLCol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: F16, F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_CLCOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h new file mode 100644 index 0000000000..a88e2dcdf3 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLColorConvertKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the color convert kernel. + * + */ +class CLColorConvertKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel(const CLColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLColorConvertKernel(CLColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; + /** Default destructor. */ + ~CLColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor + * @param[out] output Destination tensor + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input multi-planar source image + * @param[out] output single-planar destination image + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] input single-planar source image + * @param[out] output multi-planar destination image + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input multi-planar source image + * @param[out] output multi-planar destination image + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /*pointer to single planar tensor input */ + ICLTensor *_output; /*pointer to single planar tensor output */ + const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ + ICLMultiImage *_multi_output; /*pointer to multi-planar output */ +}; +} + +#endif /* __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h new file mode 100644 index 0000000000..9c0908405a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ +#define __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class CLConvolutionKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ +using CLConvolution3x3Kernel = CLConvolutionKernel<3>; +/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ +using CLConvolution5x5Kernel = CLConvolutionKernel<5>; +/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ +using CLConvolution7x7Kernel = CLConvolutionKernel<7>; +/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ +using CLConvolution9x9Kernel = CLConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel +{ +public: + /** Default Constructor */ + CLSeparableConvolutionHorKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class CLConvolutionRectangleKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h new file mode 100644 index 0000000000..eda4c66883 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLDepthConcatenateKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateKernel(const CLDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateKernel &operator=(const CLDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthConcatenateKernel(CLDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthConcatenateKernel &operator=(CLDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~CLDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: F32. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + int _top_bottom; + int _left_right; +}; +} +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h new file mode 100644 index 0000000000..2c3b1b8b69 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth conversion kernel. + * + */ +class CLDepthConvertKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * + * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} + +#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h new file mode 100644 index 0000000000..17552aefbe --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDerivativeKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ +#define __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the derivative kernel. */ +class CLDerivativeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDerivativeKernel(const CLDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDerivativeKernel(CLDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; + /** Default destructor */ + ~CLDerivativeKernel() = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ + bool _run_derivative_x; /**< Do we need to run Derivative X ? */ + bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ +}; +} +#endif /*__ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h new file mode 100644 index 0000000000..a5d3beb02f --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDilateKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDILATEKERNEL_H__ +#define __ARM_COMPUTE_CLDILATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the dilate kernel. + * + */ +class CLDilateKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLDILATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h new file mode 100644 index 0000000000..a43c925be6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLErodeKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLERODEKERNEL_H__ +#define __ARM_COMPUTE_CLERODEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the erode kernel. + * + */ +class CLErodeKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLERODEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h new file mode 100644 index 0000000000..9817b78ae0 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLFastCornersKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ +#define __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** CL kernel to perform fast corners */ +class CLFastCornersKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel(const CLFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFastCornersKernel(CLFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; + /** Default destructor */ + ~CLFastCornersKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLImage *_input; + ICLImage *_output; +}; + +/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ +class CLCopyToArrayKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCopyToArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; + /** Default destructor */ + ~CLCopyToArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< source image */ + ICLKeyPointArray *_corners; /**< destination array */ + cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ +}; +} +#endif /* __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h new file mode 100644 index 0000000000..797f86dae8 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_CLFILLBORDERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for filling the border of a kernel */ +class CLFillBorderKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel(const CLFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFillBorderKernel(CLFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default; + /** Default destructor */ + ~CLFillBorderKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in,out] tensor Tensor to process Data types supported: U8, S16, S32, F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + /** Function to set the constant value on fill border kernel depending on type. + * + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + bool is_parallelisable() const override; + +private: + ICLTensor *_tensor; +}; +} +#endif /*__ARM_COMPUTE_CLFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..3ac7b3c4fa --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ +#define __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which interleaves the elements of a matrix A in chunk of 4x4 + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class CLGEMMInterleave4x4Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMInterleave4x4Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMInterleave4x4Kernel(const CLGEMMInterleave4x4Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMInterleave4x4Kernel &operator=(const CLGEMMInterleave4x4Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMInterleave4x4Kernel(CLGEMMInterleave4x4Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMInterleave4x4Kernel &operator=(CLGEMMInterleave4x4Kernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..f84d0638da --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to compute low precision matrix multiplication kernel + * + * This kernel performs the following computation: + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + */ +class CLGEMMLowpMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyKernel(const CLGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyKernel &operator=(const CLGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyKernel(CLGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyKernel &operator=(CLGEMMLowpMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel. + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data types supported: U8 + * @param[in] input1 Input tensor containing the transposed Matrix B. Data types supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication, Data types supported: same as @p input0 + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Offset to be added to each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..ea1db9f831 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface to add a bias to each row of the input tensor + * + */ +class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input + */ + void configure(ICLTensor *accum, const ICLTensor *biases); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_accum; + const ICLTensor *_biases; +}; +} + +#endif /*__ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..c808039567 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta. + * The matrices must have the same dimensions + * + * @note This kernel is computed if and only if beta != 0.0. + */ +class CLGEMMMatrixAdditionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAdditionKernel(const CLGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAdditionKernel &operator=(const CLGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAdditionKernel(CLGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAdditionKernel &operator=(CLGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input, output and beta value + * + * @note The input and output tensors must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref CLGEMMMatrixMultiplyKernel. Data type supported: same as @p input + * @param[in] beta Weight of matrix C + */ + void configure(const ICLTensor *input, ICLTensor *output, float beta); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} + +#endif /* __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..07ea3c12ac --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLGEMMMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..8d44a4c4fa --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16. + * + * Following an example of how the transposition1xW works when the input data type is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a7 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & 17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & 27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & 37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ] + * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ] + * @note If the input data type is U8, the output matrix will have the following shape: [ height * 16, width / 16 ] + * + */ +class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/F16/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h new file mode 100644 index 0000000000..028a10b421 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Gaussian 3x3 filter kernel. + * + */ +class CLGaussian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h new file mode 100644 index 0000000000..1484c06311 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ + +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5HorKernel::configure; +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5VertKernel::configure; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h new file mode 100644 index 0000000000..6d79d0e718 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ +class CLGaussianPyramidHorKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + int _l2_load_offset; +}; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ +class CLGaussianPyramidVertKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h new file mode 100644 index 0000000000..45a5aac1bc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ +#define __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** OpenCL kernel to perform HOG Orientation Binning */ +class CLHOGOrientationBinningKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~CLHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input_magnitude; + const ICLTensor *_input_phase; + ICLTensor *_output; + Size2D _cell_size; +}; + +/** OpenCL kernel to perform HOG block normalization */ +class CLHOGBlockNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~CLHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _num_cells_per_block_stride; +}; +} +#endif /* __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h new file mode 100644 index 0000000000..47bd0549ee --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ +#define __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/OpenCL.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform HOG detector kernel using linear SVM */ +class CLHOGDetectorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; + /** Default destructor */ + ~CLHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer *_num_detection_windows; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h new file mode 100644 index 0000000000..d8057df8d1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ +#define __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the harris score kernel. + * + * @note The implementation supports 3, 5, and 7 for the block_size. + */ +class CLHarrisScoreKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; + /** Default destructor */ + ~CLHarrisScoreKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +protected: + const ICLImage *_input1; /**< Source image - Gx component */ + const ICLImage *_input2; /**< Source image - Gy component */ + ICLImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; +} +#endif /* __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h new file mode 100644 index 0000000000..b65e62d9a2 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHistogramKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__ +#define __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. + * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel + */ +class CLHistogramKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel(const CLHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramKernel(CLHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramKernel &operator=(CLHistogramKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; + +/** Interface to run the histogram kernel to handle the leftover part of image + * + */ +class CLHistogramBorderKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; +} + +#endif /* __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__*/ diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h new file mode 100644 index 0000000000..d2224b53e1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLIM2COLKERNEL_H__ +#define __ARM_COMPUTE_CLIM2COLKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class CLIm2ColKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel(const CLIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIm2ColKernel(CLIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] convolved_dims The convolved output dimensions. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_reduced(const Window &window, cl::CommandQueue &queue); + /** run the generic convolution layer input reshape kernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_generic(const Window &window, cl::CommandQueue &queue); + + /** Common signature for the kernel to run */ + using Im2ColFunction = void (CLIm2ColKernel::*)(const Window &, cl::CommandQueue &); + +private: + const ICLTensor *_input; + ICLTensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + int _kernel_size; + unsigned int _num_elems_processed_per_iteration; + Im2ColFunction _run_func; +}; +} + +#endif /*__ARM_COMPUTE_CLIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h new file mode 100644 index 0000000000..0f53c2d2a8 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ +#define __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to run the horizontal pass of the integral image kernel. */ +class CLIntegralImageHorKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; + +/** Interface to run the vertical pass of the integral image kernel. */ +class CLIntegralImageVertKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIntegralImageVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(ICLTensor *in_out); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_in_out; +}; +} +#endif /*__ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h new file mode 100644 index 0000000000..4d0dbed55d --- /dev/null +++ b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLKTRACKERKERNEL_H__ +#define __ARM_COMPUTE_CLLKTRACKERKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Internal keypoint structure for Lucas-Kanade Optical Flow */ +struct CLLKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + float tracking_status{ 0.f }; /**< the tracking status of the keypoint */ + float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */ +struct CLCoefficientTable +{ + float A11; /**< iA11 * FLT_SCALE */ + float A12; /**< iA11 * FLT_SCALE */ + float A22; /**< iA11 * FLT_SCALE */ + float min_eig; /**< Minimum eigenvalue */ +}; + +/** Structure for storing ival, ixval and iyval for each point inside the window */ +struct CLOldValue +{ + int16_t ival; /**< ival extracts from old image */ + int16_t ixval; /**< ixval extracts from scharr Gx image */ + int16_t iyval; /**< iyval extracts from scharr Gy image */ + int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +using ICLLKInternalKeypointArray = ICLArray; +using ICLCoefficientTableArray = ICLArray; +using ICLOldValArray = ICLArray; + +/** Interface to run the initialization step of LKTracker */ +class CLLKTrackerInitKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */ +class CLLKTrackerFinalizeKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */ +class CLLKTrackerStage0Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage0Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_old_input; + const ICLTensor *_old_scharr_gx; + const ICLTensor *_old_scharr_gy; +}; + +/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */ +class CLLKTrackerStage1Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage1Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_new_input; +}; +} +#endif /*__ARM_COMPUTE_CLLKTRACKERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..fda0327461 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h new file mode 100644 index 0000000000..a8e1dcb361 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ +#define __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Template interface for the kernel to compute magnitude and phase. + * + */ +class CLMagnitudePhaseKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLMagnitudePhaseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] gx The input gradient X tensor. Data types supported: S16. + * @param[in] gy The input gradient Y tensor. Data types supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Input gradient X. */ + const ICLTensor *_gy; /**< Input gradient Y. */ + ICLTensor *_magnitude; /**< Output - Magnitude. */ + ICLTensor *_phase; /**< Output - Phase. */ + bool _run_mag; /**< Calculate magnitude ? */ + bool _run_phase; /**< Calculate phase ? */ +}; +} + +#endif /* __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h new file mode 100644 index 0000000000..9f30f76e1b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ +#define __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class CLMeanStdDevKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + float *_mean; + float *_stddev; + cl::Buffer *_global_sum; + cl::Buffer *_global_sum_squared; +}; +} +#endif /* __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h new file mode 100644 index 0000000000..5af364b6c6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ +#define __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the median 3x3 filter kernel. + * + */ +class CLMedian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h new file mode 100644 index 0000000000..6a31f3cf18 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ +#define __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to perform min max search on an image. + */ +class CLMinMaxKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel(const CLMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxKernel(CLMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input Image. Data types supported: U8 or S16. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32. + */ + void configure(const ICLImage *input, cl::Buffer *min_max); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Input image. */ + cl::Buffer *_min_max; /**< Minimum/maximum value. */ + std::array _data_type_max_min; /**< Maximum and minimum data type value respectively. */ +}; + +/** Interface for the kernel to find min max locations of an image. + */ +class CLMinMaxLocationKernel : public ICLKernel +{ +public: + /** Constructor */ + CLMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8 or S16. + * @param[in] min_max Buffer of 2 elements which contains the min value at position 0 and the max value at position 1. Data type supported: S32 + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< Input image. */ + cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ +}; +} +#endif /*__ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h new file mode 100644 index 0000000000..0c59063bbc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ +#define __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to apply a non-linear filter */ +class CLNonLinearFilterKernel : public ICLSimple2DKernel +{ +public: + /** Default constructor */ + CLNonLinearFilterKernel(); + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; +} +#endif /*__ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..1719bbbb47 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ +#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL + * + * @note Used by @ref CLFastCorners and @ref CLHarrisCorners + */ +class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h new file mode 100644 index 0000000000..ca9034b162 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the normalization layer kernel. + */ +class CLNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. + * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data types should match the input type. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, const ICLTensor *squared_input, ICLTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + const ICLTensor *_squared_input; + ICLTensor *_output; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..6fbbe95219 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pixelwise multiplication kernel. + * + */ +class CLPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F16, F32. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h new file mode 100644 index 0000000000..546a40b15e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pooling layer kernel */ +class CLPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PoolingLayerInfo _pool_info; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h new file mode 100644 index 0000000000..7cebf2e817 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLRemapKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLREMAPKERNEL_H__ +#define __ARM_COMPUTE_CLREMAPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a remap on a tensor */ +class CLRemapKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLRemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLRemapKernel(const CLRemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLRemapKernel &operator=(const CLRemapKernel &) = delete; + /** Allow instances of this class to be moved */ + CLRemapKernel(CLRemapKernel &&) = default; + /** Allow instances of this class to be moved */ + CLRemapKernel &operator=(CLRemapKernel &&) = default; + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_map_x; + const ICLTensor *_map_y; +}; +} +#endif /*__ARM_COMPUTE_CLREMAPKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h new file mode 100644 index 0000000000..e74a7cb82a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLScaleKernel.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCALEKERNEL_H__ +#define __ARM_COMPUTE_CLSCALEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLScaleKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * + * @param[in] input Source tensor. Data types supported: U8, S16. + * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} + +#endif /*__ARM_COMPUTE_CLSCALEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h new file mode 100644 index 0000000000..fe245cc351 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ +#define __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -3 & 0 & +3\\ + * -10& 0 & +10\\ + * -3 & 0 & +3 + * \end{vmatrix} + * @f] + * @f[ + * \mathbf{G}_y=\begin{vmatrix} + * -3 & -10 & -3\\ + * 0 & 0 & 0\\ + * +3 & +10 & +3 + * \end{vmatrix} + * @f] + */ +class CLScharr3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ICLTensor *_input; /**< Input image */ + ICLTensor *_output_x; /**< Output image for scharr X */ + ICLTensor *_output_y; /**< Output image for scharr Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h new file mode 100644 index 0000000000..9edeb6ceff --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ +class CLSobel3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; + /** Default destructor */ + ~CLSobel3x3Kernel() = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor for Sobel X */ + ICLTensor *_output_y; /**< Output tensor for Sobel Y */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h new file mode 100644 index 0000000000..e90f8f587e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h new file mode 100644 index 0000000000..e5ef8444ee --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h new file mode 100644 index 0000000000..0806974ad6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the identifying the max value of 1D Logits */ +class CLLogits1DMaxKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class CLLogits1DShiftExpSumKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[in] max Max values tensor. Matching input type and channel number. + * @param[out] output Destination tensor. Matching input type and channel number. + * @param[out] sum Sum of 1D logits tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_max; + ICLTensor *_output; + ICLTensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class CLLogits1DNormKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Matching input type and channel number. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h new file mode 100644 index 0000000000..477f58dc38 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLTableLookupKernel.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Interface for the kernel to perform table lookup calculations. */ +class CLTableLookupKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h new file mode 100644 index 0000000000..d7a6ae2cdb --- /dev/null +++ b/arm_compute/core/CL/kernels/CLThresholdKernel.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__ +#define __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the thresholding kernel. + * + */ +class CLThresholdKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h new file mode 100644 index 0000000000..9ad183f8f1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class CLTransposeKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h new file mode 100644 index 0000000000..05d6d0a8f7 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ +#define __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLWarpAffineKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h new file mode 100644 index 0000000000..5c5013c599 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ +#define __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; +/** Interface for the warp perspective kernel.*/ +class CLWarpPerspectiveKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} + +#endif /*__ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h new file mode 100644 index 0000000000..1dc8a8b80e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class CLWeightsReshapeKernel : public ICLKernel +{ +public: + /** Constructor. + * + * @param[in] is_shared Flag to indicate whether the weights are shared or not. + */ + CLWeightsReshapeKernel(bool is_shared = false); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~CLWeightsReshapeKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: F16, F32 + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output); + + // Inherited methods overridden: + virtual void run(const Window &window, cl::CommandQueue &queue) = 0; + +protected: + bool _is_shared; + const ICLTensor *_input; + const ICLTensor *_biases; + ICLTensor *_output; +}; + +/** Interface for the weights reshape kernel used by convolution and fully connected layers. + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref CLIm2ColKernel can transform a convolution into a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class CLConvolutionLayerWeightsReshapeKernel : public CLWeightsReshapeKernel +{ +public: + /** Default constructor */ + CLConvolutionLayerWeightsReshapeKernel(); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface for the weights reshape kernel used by locally connected layers. */ +class CLLocallyConnectedLayerWeightsReshapeKernel : public CLWeightsReshapeKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedLayerWeightsReshapeKernel(); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h new file mode 100644 index 0000000000..1eabfa9437 --- /dev/null +++ b/arm_compute/core/CPP/CPPKernels.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPKERNELS_H__ +#define __ARM_COMPUTE_CPPKERNELS_H__ + +/* Header regrouping all the CPP kernels */ +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" + +#endif /* __ARM_COMPUTE_CPPKERNELS_H__ */ diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h new file mode 100644 index 0000000000..99ae68f2e5 --- /dev/null +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICPPKERNEL_H__ +#define __ARM_COMPUTE_ICPPKERNEL_H__ + +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class Window; + +/** Common interface for all kernels implemented in C++ */ +class ICPPKernel : public IKernel +{ +public: + /** Default destructor */ + virtual ~ICPPKernel() = default; + + /** Execute the kernel on the passed window + * + * @warning If is_parallelisable() returns false then the passed window must be equal to window() + * + * @note The window has to be a region within the window returned by the window() method + * + * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + */ + virtual void run(const Window &window) = 0; +}; +} +#endif /*__ARM_COMPUTE_ICPPKERNEL_H__ */ diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h new file mode 100644 index 0000000000..105de397a2 --- /dev/null +++ b/arm_compute/core/CPP/ICPPSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ +class ICPPSimpleKernel : public ICPPKernel +{ +public: + /** Constructor */ + ICPPSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICPPSimpleKernel(const ICPPSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete; + /** Allow instances of this class to be moved */ + ICPPSimpleKernel(ICPPSimpleKernel &&) = default; + /** Allow instances of this class to be moved */ + ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default; + /** Default destructor */ + ~ICPPSimpleKernel() = default; + +protected: + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const ITensor *_input; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h new file mode 100644 index 0000000000..0866d4ee57 --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ +#define __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** CPP kernel to perform corner candidates + */ +class CPPCornerCandidatesKernel : public INEKernel +{ +public: + /** Default constructor */ + CPPCornerCandidatesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPCornerCandidatesKernel(const CPPCornerCandidatesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPCornerCandidatesKernel &operator=(const CPPCornerCandidatesKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPCornerCandidatesKernel(CPPCornerCandidatesKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPCornerCandidatesKernel &operator=(CPPCornerCandidatesKernel &&) = default; + /** Default destructor */ + ~CPPCornerCandidatesKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input Source image (harris score). Format supported F32 + * @param[out] output Destination array of InternalKeypoint + * @param[out] num_corner_candidates Number of corner candidates + */ + void configure(const IImage *input, InternalKeypoint *output, int32_t *num_corner_candidates); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + int32_t *_num_corner_candidates; /**< Number of corner candidates */ + std::mutex _corner_candidates_mutex; /**< Mutex to preventing race conditions */ + const IImage *_input; /**< Source image - Harris score */ + InternalKeypoint *_output; /**< Array of NEInternalKeypoint */ +}; +} //namespace arm_compute +#endif /* __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h new file mode 100644 index 0000000000..bcb3026959 --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ +#define __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** CPP kernel to perform in-place computation of euclidean distance on IDetectionWindowArray + * + * @note This kernel is meant to be used alongside HOG or other object detection algorithms to perform a non-maxima suppression on a + * IDetectionWindowArray + */ +class CPPDetectionWindowNonMaximaSuppressionKernel : public ICPPKernel +{ +public: + /** Default constructor */ + CPPDetectionWindowNonMaximaSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPDetectionWindowNonMaximaSuppressionKernel(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPDetectionWindowNonMaximaSuppressionKernel &operator=(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; + /** Initialise the kernel's input, output and the euclidean minimum distance + * + * @attention: If @ref CLDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref CLDetectionWindowArray must be called respectively before and after + * the run() method of @ref CPPDetectionWindowNonMaximaSuppressionKernel + * + * @param[in, out] input_output Input/Output array of @ref DetectionWindow + * @param[in] min_distance Radial Euclidean distance for non-maxima suppression + */ + void configure(IDetectionWindowArray *input_output, float min_distance); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + IDetectionWindowArray *_input_output; + float _min_distance; +}; +} + +#endif /* __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h new file mode 100644 index 0000000000..b7a7d9ff9f --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ +#define __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/core/IArray.h" + +#include +#include + +namespace arm_compute +{ +/** CPP kernel to perform sorting and euclidean distance */ +class CPPSortEuclideanDistanceKernel : public ICPPKernel +{ +public: + /** Default constructor */ + CPPSortEuclideanDistanceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPSortEuclideanDistanceKernel(const CPPSortEuclideanDistanceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPSortEuclideanDistanceKernel &operator=(const CPPSortEuclideanDistanceKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPSortEuclideanDistanceKernel(CPPSortEuclideanDistanceKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPSortEuclideanDistanceKernel &operator=(CPPSortEuclideanDistanceKernel &&) = default; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] in_out Input internal keypoints. Marked as out as the kernel writes 0 in the strength member. + * @param[out] output Output keypoints. + * @param[in] num_corner_candidates Pointer to the number of corner candidates in the input array + * @param[in] min_distance Radial Euclidean distance to use + */ + void configure(InternalKeypoint *in_out, IKeyPointArray *output, const int32_t *num_corner_candidates, float min_distance); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const int32_t *_num_corner_candidates; /**< Number of corner candidates */ + float _min_distance; /**< Radial Euclidean distance */ + InternalKeypoint *_in_out; /**< Source array of InternalKeypoint */ + IKeyPointArray *_output; /**< Destination array of IKeyPointArray */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ */ diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h new file mode 100644 index 0000000000..3a99abbd74 --- /dev/null +++ b/arm_compute/core/Coordinates.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_COORDINATES_H__ +#define __ARM_COMPUTE_COORDINATES_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Coordinates of an item */ +class Coordinates : public Dimensions +{ +public: + /** Constructor to initialize the coordinates. + * + * @param[in] coords Values to initialize the dimensions. + */ + template + constexpr Coordinates(Ts... coords) + : Dimensions{ coords... } + { + } + /** Allow instances of this class to be copy constructed */ + constexpr Coordinates(const Coordinates &) = default; + /** Allow instances of this class to be copied */ + Coordinates &operator=(const Coordinates &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Coordinates(Coordinates &&) = default; + /** Allow instances of this class to be moved */ + Coordinates &operator=(Coordinates &&) = default; + /** Default destructor */ + ~Coordinates() = default; +}; +} +#endif /*__ARM_COMPUTE_COORDINATES_H__*/ diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h new file mode 100644 index 0000000000..b080435b69 --- /dev/null +++ b/arm_compute/core/Dimensions.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DIMENSIONS_H__ +#define __ARM_COMPUTE_DIMENSIONS_H__ + +#include "arm_compute/core/Error.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/* Constant value used to indicate maximum dimensions of a Window, TensorShape and Coordinates */ +constexpr size_t MAX_DIMS = 6; + +/** Dimensions with dimensionality */ +template +class Dimensions +{ +public: + /** Number of dimensions the tensor has */ + static constexpr size_t num_max_dimensions = MAX_DIMS; + + /** Constructor to initialize the tensor shape. + * + * @param[in] dims Values to initialize the dimensions. + */ + template + Dimensions(Ts... dims) + : _id{ { dims... } }, _num_dimensions{ sizeof...(dims) } + { + } + + /** Allow instances of this class to be copy constructed */ + Dimensions(const Dimensions &) = default; + + /** Allow instances of this class to be copied */ + Dimensions &operator=(const Dimensions &) = default; + + /** Allow instances of this class to be move constructed */ + Dimensions(Dimensions &&) = default; + + /** Allow instances of this class to be moved */ + Dimensions &operator=(Dimensions &&) = default; + + /** Accessor to set the value of one of the dimensions. + * + * @param[in] dimension Dimension for which the value is set. + * @param[in] value Value to be set for the dimension. + */ + void set(size_t dimension, T value) + { + ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); + _id[dimension] = value; + _num_dimensions = std::max(_num_dimensions, dimension + 1); + } + /** Alias to access the size of the first dimension */ + T x() const + { + return _id[0]; + } + /** Alias to access the size of the second dimension */ + T y() const + { + return _id[1]; + } + /** Alias to access the size of the third dimension */ + T z() const + { + return _id[2]; + } + /** Generic accessor to get the size of any dimension + * + * @note Precondition: dimension < Dimensions::num_max_dimensions + * + * @param[in] dimension Dimension of the wanted size + * + * @return The size of the requested dimension. + */ + T operator[](size_t dimension) const + { + ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); + return _id[dimension]; + } + /** Returns the effective dimensionality of the tensor */ + unsigned int num_dimensions() const + { + return _num_dimensions; + } + + /** Set number of dimensions */ + void set_num_dimensions(size_t num_dimensions) + { + _num_dimensions = num_dimensions; + } + + /** Collapse dimensions. + * + * @param[in] first Dimensions into which the following @p n are collapsed. + * @param[in] n Number of dimensions to collapse into @p first. + */ + void collapse(size_t n, size_t first = 0) + { + ARM_COMPUTE_ERROR_ON(first + n > _id.size()); + + // Collapse dimensions into the first + _id[first] = std::accumulate(_id.cbegin() + first, _id.cbegin() + first + n, 1, std::multiplies()); + // Shift the remaining dimensions down + std::copy(_id.begin() + first + n, _id.end(), _id.begin() + first + 1); + // Reduce the number of dimensions + _num_dimensions -= n - 1; + // Fill the now empty dimensions with zero + std::fill(_id.begin() + _num_dimensions, _id.end(), 0); + } + + /** Returns a read/write iterator that points to the first element in the dimension array. */ + typename std::array::iterator begin() + { + return _id.begin(); + } + /** Returns a read-only (constant) iterator that points to the first element in the dimension array. */ + typename std::array::const_iterator begin() const + { + return _id.begin(); + } + /** Returns a read-only (constant) iterator that points to the first element in the dimension array. */ + typename std::array::const_iterator cbegin() const + { + return begin(); + } + /** Returns a read/write iterator that points one past the last element in the dimension array. */ + typename std::array::iterator end() + { + return _id.end(); + } + /** Returns a read-only (constant) iterator that points one past the last element in the dimension array. */ + typename std::array::const_iterator end() const + { + return _id.end(); + } + /** Returns a read-only (constant) iterator that points one past the last element in the dimension array. */ + typename std::array::const_iterator cend() const + { + return end(); + } + +protected: + /** Protected destructor. */ + ~Dimensions() = default; + + std::array _id; + size_t _num_dimensions{ 0 }; +}; +} +#endif /*__ARM_COMPUTE_DIMENSIONS_H__*/ diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h new file mode 100644 index 0000000000..c4c452bacf --- /dev/null +++ b/arm_compute/core/Error.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ERROR_H__ +#define __ARM_COMPUTE_ERROR_H__ + +/** Print the given message then throw an std::runtime_error. + * + * @param[in] ... Message to display before aborting. + */ +#define ARM_COMPUTE_ERROR(...) ::arm_compute::error(__func__, __FILE__, __LINE__, __VA_ARGS__) // NOLINT + +/** Print the given message then throw an std::runtime_error. + * + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + * @param[in] ... Message to display before aborting. + */ +#define ARM_COMPUTE_ERROR_LOC(func, file, line, ...) ::arm_compute::error(func, file, line, __VA_ARGS__) // NOLINT + +/** To avoid unused variables warnings + * + * This is useful if for example a variable is only used + * in debug builds and generates a warning in release builds. + * + * @param[in] var Variable which is unused + */ +#define ARM_COMPUTE_UNUSED(var) (void)(var) + +#ifdef ARM_COMPUTE_DEBUG_ENABLED +/** Print the given message + * + * @param[in] ... Message to display + */ +#define ARM_COMPUTE_INFO(...) ::arm_compute::debug(__func__, __FILE__, __LINE__, __VA_ARGS__) // NOLINT +/** If the condition is true, the given message is printed + * + * @param[in] cond Condition to evaluate. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_INFO_ON_MSG(cond, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_INFO(__VA_ARGS__); \ + } \ + } while(0) +#else /* ARM_COMPUTE_DEBUG_ENABLED */ +#define ARM_COMPUTE_INFO_ON_MSG(cond, ...) +#define ARM_COMPUTE_INFO(...) +#endif /* ARM_COMPUTE_DEBUG_ENABLED */ + +#ifdef ARM_COMPUTE_ASSERTS_ENABLED +/** If the condition is true, the given message is printed and an exception is thrown + * + * @param[in] cond Condition to evaluate. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_ERROR_ON_MSG(cond, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_ERROR(__VA_ARGS__); \ + } \ + } while(0) + +/** If the condition is true, the given message is printed and an exception is thrown + * + * @param[in] cond Condition to evaluate. + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_ERROR_LOC(func, file, line, __VA_ARGS__); \ + } \ + } while(0) + +/** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned + * + * @param[in] cond Condition to evaluate. + * @param[in] val Value to be returned. + * @param[in] msg Message to print if cond is false. + */ +#define ARM_COMPUTE_CONST_ON_ERROR(cond, val, msg) (cond) ? throw std::logic_error(msg) : val; +#else /* ARM_COMPUTE_ASSERTS_ENABLED */ +#define ARM_COMPUTE_ERROR_ON_MSG(cond, ...) +#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) +#define ARM_COMPUTE_CONST_ON_ERROR(cond, val, msg) val +#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ + +/** If the condition is true then an error message is printed and an exception thrown + * + * @param[in] cond Condition to evaluate + */ +#define ARM_COMPUTE_ERROR_ON(cond) \ + ARM_COMPUTE_ERROR_ON_MSG(cond, #cond) + +/** If the condition is true then an error message is printed and an exception thrown + * + * @param[in] cond Condition to evaluate + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + */ +#define ARM_COMPUTE_ERROR_ON_LOC(cond, func, file, line) \ + ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, #cond) + +namespace arm_compute +{ +/** Print an error message then throw an std::runtime_error + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] msg Message to display before aborting. + * @param[in] ... Variable number of arguments of the message. + */ +[[noreturn]] void error(const char *function, const char *file, const int line, const char *msg, ...); + +/** Print a debug message + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] msg Message to display before aborting. + * @param[in] ... Variable number of arguments of the message. + */ +void debug(const char *function, const char *file, const int line, const char *msg, ...); +} + +#endif /* __ARM_COMPUTE_ERROR_H__ */ diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h new file mode 100644 index 0000000000..925b4949a3 --- /dev/null +++ b/arm_compute/core/FixedPoint.h @@ -0,0 +1,217 @@ +/* + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_FIXEDPOINT_H__ +#define __ARM_COMPUTE_FIXEDPOINT_H__ + +#include + +namespace arm_compute +{ +using qint8_t = int8_t; /**< 8 bit fixed point scalar value */ +using qint16_t = int16_t; /**< 16 bit fixed point scalar value */ +using qint32_t = int32_t; /**< 32 bit fixed point scalar value */ + +/** 8 bit fixed point scalar saturating shift left + * + * @param[in] a First 8 bit fixed point input + * @param[in] shift Shift amount + * + * @return The result of the 8 bit fixed point shift. The result is saturated in case of overflow + */ +qint8_t sqshl_qs8(qint8_t a, int shift); + +/** 8 bit fixed point scalar absolute value + * + * @param[in] a 8 bit fixed point input + * + * @return The result of the 8 bit fixed point absolute value + */ +qint8_t sabs_qs8(qint8_t a); + +/** 8 bit fixed point scalar add + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point addition + */ +qint8_t sadd_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar saturating add + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point addition. The result is saturated in case of overflow + */ +qint8_t sqadd_qs8(qint8_t a, qint8_t b); + +/** 16 bit fixed point scalar saturating add + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * + * @return The result of the 16 bit fixed point addition. The result is saturated in case of overflow + */ +qint16_t sqadd_qs16(qint16_t a, qint16_t b); + +/** 8 bit fixed point scalar subtraction + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point subtraction + */ +qint8_t ssub_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar saturating subtraction + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point subtraction. The result is saturated in case of overflow + */ +qint8_t sqsub_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar multiply + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication. + */ +qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar saturating multiply + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication. The result is saturated in case of overflow + */ +qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar multiply long + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication long. The result is saturated in case of overflow + */ +qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 16 bit fixed point scalar saturating multiply +* +* @param[in] a First 16 bit fixed point input +* @param[in] b Second 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point multiplication. The result is saturated in case of overflow +*/ +qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position); + +/** 8 bit fixed point scalar inverse square root +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point inverse square root. +*/ +qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position); + +/** 8 bit fixed point scalar division +* +* @param[in] a First 8 bit fixed point input +* @param[in] b Second 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point division. +*/ +qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar exponential +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point exponential. +*/ +qint8_t sexp_qs8(qint8_t a, int fixed_point_position); + +/** 8 bit fixed point scalar logarithm +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point logarithm. +*/ +qint8_t slog_qs8(qint8_t a, int fixed_point_position); + +/** Convert an 8 bit fixed point to float + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float + */ +float scvt_f32_qs8(qint8_t a, int fixed_point_position); + +/** Convert a float to 8 bit fixed point + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8_t scvt_qs8_f32(float a, int fixed_point_position); + +/** Convert a 16 bit fixed point to float + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 16 bit fixed point -> float + */ +float scvt_f32_qs16(qint16_t a, int fixed_point_position); + +/** Convert a float to 16 bit fixed point + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 16 bit fixed point + */ +qint8_t scvt_qs16_f32(float a, int fixed_point_position); + +/** Scalar saturating move and narrow. + * + * @param[in] a Input to convert to 8 bit fixed point + * + * @return The narrowing conversion to 8 bit + */ +qint8_t sqmovn_qs16(qint16_t a); +} +#include "arm_compute/core/FixedPoint.inl" +#endif /* __ARM_COMPUTE_FIXEDPOINT_H__ */ diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl new file mode 100644 index 0000000000..4263a6f00d --- /dev/null +++ b/arm_compute/core/FixedPoint.inl @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +namespace +{ +template +inline TpSat saturate_convert(TpIn a) +{ + if(a > std::numeric_limits::max()) + { + a = std::numeric_limits::max(); + } + if(a < std::numeric_limits::min()) + { + a = std::numeric_limits::min(); + } + return static_cast(a); +} +} // namespace + +namespace arm_compute +{ +inline qint8_t sqshl_qs8(qint8_t a, int shift) +{ + qint16_t tmp = static_cast(a) << shift; + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint8_t sabs_qs8(qint8_t a) +{ + return a & 0x7F; +} + +inline qint8_t sadd_qs8(qint8_t a, qint8_t b) +{ + return a + b; +} + +inline qint8_t sqadd_qs8(qint8_t a, qint8_t b) +{ + // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow + qint16_t tmp = (static_cast(a) + static_cast(b)); + + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint16_t sqadd_qs16(qint16_t a, qint16_t b) +{ + // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow + qint32_t tmp = (static_cast(a) + static_cast(b)); + + // Saturate the result in case of overflow and cast to qint16_t + return saturate_convert(tmp); +} + +inline qint8_t ssub_qs8(qint8_t a, qint8_t b) +{ + return a - b; +} + +inline qint8_t sqsub_qs8(qint8_t a, qint8_t b) +{ + // We need to store the temporary result in uint16_t otherwise we cannot evaluate the overflow + qint16_t tmp = static_cast(a) - static_cast(b); + + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return static_cast(tmp >> fixed_point_position); +} + +inline qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return saturate_convert(tmp >> fixed_point_position); +} + +inline qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t round_up_const = (1 << (fixed_point_position - 1)); + + qint32_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return saturate_convert(tmp >> fixed_point_position); +} + +inline qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return tmp >> fixed_point_position; +} + +inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position) +{ + qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24)); + + qint8_t const_three = (3 << fixed_point_position); + qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift); + qint8_t x2 = temp; + + // We need three iterations to find the result + for(int i = 0; i < 3; i++) + { + qint8_t three_minus_dx = ssub_qs8(const_three, smul_qs8(temp, smul_qs8(x2, x2, fixed_point_position), fixed_point_position)); + x2 = (smul_qs8(x2, three_minus_dx, fixed_point_position) >> 1); + } + + temp = shift < 0 ? (x2 << (-shift >> 1)) : (x2 >> (shift >> 1)); + + return temp; +} + +inline qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + qint16_t temp = a << fixed_point_position; + return (qint8_t)(temp / b); +} + +inline qint8_t sqexp_qs8(qint8_t a, int fixed_point_position) +{ + // Constants + qint8_t const_one = (1 << fixed_point_position); + qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one; + qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1; + + // Polynomial expansion + int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position); + qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position))); + qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A); + sum = sqmul_qs8(alpha, sum, fixed_point_position); + sum = sqadd_qs8(sum, const_one); + + return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs8(sum, dec_a); +} + +inline qint8_t slog_qs8(qint8_t a, int fixed_point_position) +{ + // Constants + qint8_t const_one = (1 << fixed_point_position); + qint8_t ln2 = (0x58 >> (7 - fixed_point_position)); + qint8_t A = (0x5C >> (7 - fixed_point_position - 1)); + qint8_t B = -(0x56 >> (7 - fixed_point_position)); + qint8_t C = (0x29 >> (7 - fixed_point_position)); + qint8_t D = -(0x0A >> (7 - fixed_point_position)); + + if((const_one == a) || (a < 0)) + { + return 0; + } + else if(a < const_one) + { + return -slog_qs8(sdiv_qs8(const_one, a, fixed_point_position), fixed_point_position); + } + + // Remove even powers of 2 + qint8_t shift_val = 31 - __builtin_clz(a >> fixed_point_position); + a >>= shift_val; + a = ssub_qs8(a, const_one); + + // Polynomial expansion + auto sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A); + sum = sqmul_qs8(a, sum, fixed_point_position); + + return smul_qs8(sadd_qs8(sum, shift_val << fixed_point_position), ln2, fixed_point_position); +} + +inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) +{ + return static_cast(a) / (1 << fixed_point_position); +} + +inline qint8_t scvt_qs8_f32(float a, int fixed_point_position) +{ + // round_nearest_integer(a * 2^(fixed_point_position)) + return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); +} + +inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) +{ + return static_cast(a) / (1 << fixed_point_position); +} + +inline qint8_t scvt_qs16_f32(float a, int fixed_point_position) +{ + // round_nearest_integer(a * 2^(fixed_point_position)) + return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); +} + +inline qint8_t sqmovn_qs16(qint16_t a) +{ + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(a); +} +} diff --git a/arm_compute/core/HOGInfo.h b/arm_compute/core/HOGInfo.h new file mode 100644 index 0000000000..654629306d --- /dev/null +++ b/arm_compute/core/HOGInfo.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HOGINFO_H__ +#define __ARM_COMPUTE_HOGINFO_H__ + +#include "arm_compute/core/Size2D.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Store the HOG's metadata */ +class HOGInfo +{ +public: + /** Default constructor */ + HOGInfo(); + /** Default destructor */ + virtual ~HOGInfo() = default; + /** Allow instances of this class to be copy constructed */ + HOGInfo(const HOGInfo &) = default; + /** Allow instances of this class to be copied */ + HOGInfo &operator=(const HOGInfo &) = default; + /** Allow instances of this class to be move constructed */ + HOGInfo(HOGInfo &&) = default; + /** Allow instances of this class to be moved */ + HOGInfo &operator=(HOGInfo &&) = default; + /** Constructor + * + * @param[in] cell_size Cell size in pixels + * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. + * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. + * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size + * @param[in] num_bins Number of histogram bins for each cell + * @param[in] normalization_type (Optional) Normalization type to use for each block + * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method + * @param[in] phase_type (Optional) Type of @ref PhaseType + */ + HOGInfo(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, + HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); + /** Initialize the metadata structure with the given parameters + * + * @param[in] cell_size Cell size in pixels + * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. + * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. + * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size + * @param[in] num_bins Number of histogram bins for each cell + * @param[in] normalization_type (Optional) Normalization type to use for each block + * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method + * @param[in] phase_type (Optional) Type of @ref PhaseType + */ + void init(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, + HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); + /** The cell size in pixels + * + * @return The cell size in pixels + */ + const Size2D &cell_size() const; + /** The block size in pixels + * + * @return The block size in pixels + */ + const Size2D &block_size() const; + /** The detection window size in pixels + * + * @return The detection window size in pixels + */ + const Size2D &detection_window_size() const; + /** The block stride in pixels. The block stride is the distance between 2 consecutive blocks + * + * @return The block stride in pixels + */ + const Size2D &block_stride() const; + /** The number of histogram bins for each cell + * + * @return The number of histogram bins for each cell + */ + size_t num_bins() const; + /** The normalization type + * + * @return The normalization type + */ + HOGNormType normalization_type() const; + /** Threshold used for L2HYS_NORM normalization type + * + * @return Threshold used for L2HYS_NORM normalization type + */ + float l2_hyst_threshold() const; + /** The type of @ref PhaseType + * + * @return The type of @ref PhaseType + */ + PhaseType phase_type() const; + /** The size of HOG descriptor + * + * @return The size of HOG descriptor + */ + size_t descriptor_size() const; + /** Calculates the number of cells for each block + * + * @return The Size2D data object which stores the number of cells along the x and y directions + */ + Size2D num_cells_per_block() const; + /** Calculates the number of blocks for the given image size + * + * @param[in] image_size The input image size data object + * + * @return The Size2D data object which stores the number of blocks along the x and y directions + */ + Size2D num_blocks_per_image(const Size2D &image_size) const; + +private: + Size2D _cell_size; + Size2D _block_size; + Size2D _detection_window_size; + Size2D _block_stride; + size_t _num_bins; + HOGNormType _normalization_type; + float _l2_hyst_threshold; + PhaseType _phase_type; + size_t _descriptor_size; +}; +} +#endif /*__ARM_COMPUTE_HOGINFO_H__ */ diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h new file mode 100644 index 0000000000..07318eaf7a --- /dev/null +++ b/arm_compute/core/Helpers.h @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HELPERS_H__ +#define __ARM_COMPUTE_HELPERS_H__ + +#include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/Steps.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include +#include +#include +#include +#include +#include +#include + +namespace arm_compute +{ +class IKernel; +class ITensor; +class ITensorInfo; + +namespace cpp14 +{ +template +struct _Unique_if +{ + typedef std::unique_ptr _Single_object; +}; + +template +struct _Unique_if +{ + typedef std::unique_ptr _Unknown_bound; +}; + +template +struct _Unique_if +{ + typedef void _Known_bound; +}; + +template +typename _Unique_if::_Single_object +make_unique(Args &&... args) +{ + return std::unique_ptr(new T(std::forward(args)...)); +} + +template +typename _Unique_if::_Unknown_bound +make_unique(size_t n) +{ + typedef typename std::remove_extent::type U; + return std::unique_ptr(new U[n]()); +} + +template +typename _Unique_if::_Known_bound +make_unique(Args &&...) = delete; +} + +template +struct enable_bitwise_ops +{ + static constexpr bool value = false; +}; + +template +typename std::enable_if::value, T>::type operator&(T lhs, T rhs) +{ + using underlying_type = typename std::underlying_type::type; + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + +namespace traits +{ +/** Check if a type T is contained in a tuple Tuple of types */ +template +struct is_contained; + +template +struct is_contained> : std::false_type +{ +}; + +template +struct is_contained> : std::true_type +{ +}; + +template +struct is_contained> : is_contained> +{ +}; +} + +/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value. Format: Single channel U8 + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy); + +/** Return the pixel at (x,y) using bilinear interpolation. The image must be single channel U8 + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image; + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y); + +/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel U8 + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y); + +/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 + * + * @note The interpolation area depends on the width and height ration of the input and output images + * @note Currently average of the contributing pixels is calculated + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] wr Width ratio among the input image width and output image width. + * @param[in] hr Height ratio among the input image height and output image height. + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using area interpolation. + */ +inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y); + +/** Performs clamping among a lower and upper value. + * + * @param[in] n Value to clamp. + * @param[in] lower Lower threshold. + * @param[in] upper Upper threshold. + * + * @return Clamped value. + */ +template +inline T clamp(const T &n, const T &lower, const T &upper) +{ + return std::max(lower, std::min(n, upper)); +} + +/** Base case of for_each. Does nothing. */ +template +inline void for_each(F &&) +{ +} + +/** Call the function for each of the arguments + * + * @param[in] func Function to be called + * @param[in] arg Argument passed to the function + * @param[in] args Remaining arguments + */ +template +inline void for_each(F &&func, T &&arg, Ts &&... args) +{ + func(arg); + for_each(func, args...); +} + +/** Base case of foldl. + * + * @return value. + */ +template +inline T foldl(F &&, const T &value) +{ + return value; +} + +/** Base case of foldl. + * + * @return Function evaluation for value1 and value2 + */ +template +inline auto foldl(F &&func, T &&value1, U &&value2) -> decltype(func(value1, value2)) +{ + return func(value1, value2); +} + +/** Fold left. + * + * @param[in] func Function to be called + * @param[in] initial Initial value + * @param[in] value Argument passed to the function + * @param[in] values Remaining arguments + */ +template +inline I foldl(F &&func, I &&initial, T &&value, Vs &&... values) +{ + return foldl(std::forward(func), func(std::forward(initial), std::forward(value)), std::forward(values)...); +} + +/** Iterator updated by @ref execute_window_loop for each window element */ +class Iterator +{ +public: + /** Default constructor to create an empty iterator */ + constexpr Iterator(); + /** Create a container iterator for the metadata and allocation contained in the ITensor + * + * @param[in] tensor The tensor to associate to the iterator. + * @param[in] window The window which will be used to iterate over the tensor. + */ + Iterator(const ITensor *tensor, const Window &window); + + /** Increment the iterator along the specified dimension of the step value associated to the dimension. + * + * @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow. + * + * @note When incrementing a dimension 'n' the coordinates of all the dimensions in the range (0,n-1) are reset. For example if you iterate over a 2D image, everytime you change row (dimension 1), the iterator for the width (dimension 0) is reset to its start. + * + * @param[in] dimension Dimension to increment + */ + void increment(size_t dimension); + + /** Return the offset in bytes from the first element to the current position of the iterator + * + * @return The current position of the iterator in bytes relative to the first element. + */ + constexpr int offset() const; + + /** Return a pointer to the current pixel. + * + * @warning Only works if the iterator was created with an ITensor. + * + * @return equivalent to buffer() + offset() + */ + constexpr uint8_t *ptr() const; + + /** Move the iterator back to the beginning of the specified dimension. + * + * @param[in] dimension Dimension to reset + */ + void reset(size_t dimension); + +private: + uint8_t *_ptr; + + class Dimension + { + public: + constexpr Dimension() + : _dim_start(0), _stride(0) + { + } + + int _dim_start; + int _stride; + }; + + std::array _dims; +}; + +/** Iterate through the passed window, automatically adjusting the iterators and calling the lambda_functino for each element. + * It passes the x and y positions to the lambda_function for each iteration + * + * @param[in] w Window to iterate through. + * @param[in] lambda_function The function of type void(function)( const Coordinates & id ) to call at each iteration. + * Where id represents the absolute coordinates of the item to process. + * @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function. + */ +template +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators); + +/** Update window and padding size for each of the access patterns. + * + * First the window size is reduced based on all access patterns that are not + * allowed to modify the padding of the underlying tensor. Then the padding of + * the remaining tensors is increased to match the window. + * + * @param[in] win Window that is used by the kernel. + * @param[in] patterns Access patterns used to calculate the final window and padding. + * + * @return True if the window has been changed. Changes to the padding do not + * influence the returned value. + */ +template +bool update_window_and_padding(Window &win, Ts &&... patterns) +{ + bool window_changed = false; + + for_each([&](const IAccessWindow & w) + { + window_changed |= w.update_window_if_needed(win); + }, + patterns...); + + bool padding_changed = false; + + for_each([&](const IAccessWindow & w) + { + padding_changed |= w.update_padding_if_needed(win); + }, + patterns...); + + return window_changed; +} + +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. The border region will be excluded from the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] border_size (Optional) Border size. The border region will be included in the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); + +/** Intersect multiple valid regions. + * + * @param[in] regions Valid regions. + * + * @return Intersection of all regions. + */ +template +ValidRegion intersect_valid_regions(Ts &&... regions) +{ + auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion + { + ValidRegion region; + + for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) + { + region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); + } + + for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) + { + region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); + } + + return region; + }; + + return foldl(intersect, std::forward(regions)...); +} + +/** Create a strides object based on the provided strides and the tensor dimensions. + * + * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides. + * @param[in] stride_x Stride to be used in X dimension (in bytes). + * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes). + * + * @return Strides object based on the specified strides. Missing strides are + * calculated based on the tensor shape and the strides of lower dimensions. + */ +template +inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) +{ + const TensorShape &shape = info.tensor_shape(); + + // Create strides object + Strides strides(stride_x, fixed_strides...); + + for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) + { + strides.set(i, shape[i - 1] * strides[i - 1]); + } + + return strides; +} + +/** Create a strides object based on the tensor dimensions. + * + * @param[in] info Tensor info object used to compute the strides. + * + * @return Strides object based on element size and tensor shape. + */ +template +inline Strides compute_strides(const ITensorInfo &info) +{ + return compute_strides(info, info.element_size()); +} + +/* Auto initialize the tensor info (shape, number of channels, data type and fixed point position) if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * @param[in] num_channels New number of channels. + * @param[in] data_type New data type + * @param[in] fixed_point_position New fixed point position + * + * @return True if the tensor info has been initialized + */ +bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, int fixed_point_position); + +/* Set the shape to the specified value if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * + * @return True if the shape has been changed. + */ +bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape); + +/* Set the format, data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] format New format. + * + * @return True if the format has been changed. + */ +bool set_format_if_unknown(ITensorInfo &info, Format format); + +/* Set the data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] data_type New data type. + * + * @return True if the data type has been changed. + */ +bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type); + +/* Set the fixed point position to the specified value if + * the current fixed point position is 0 and the data type is QS8 or QS16 + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] fixed_point_position New fixed point position + * + * @return True if the fixed point position has been changed. + */ +bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position); +} // namespace arm_compute + +#include "arm_compute/core/Helpers.inl" +#endif /*__ARM_COMPUTE_HELPERS_H__ */ diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl new file mode 100644 index 0000000000..f885810078 --- /dev/null +++ b/arm_compute/core/Helpers.inl @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" + +#include +#include + +namespace arm_compute +{ +inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = *pixel_ptr; + const float a01 = *(pixel_ptr + 1); + const float a10 = *(pixel_ptr + stride); + const float a11 = *(pixel_ptr + stride + 1); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + + return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; +} + +inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + const int32_t xi = x; + const int32_t yi = y; + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1u8(first_pixel_ptr + xi + yi * stride, stride, dx, dy); +} + +inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + x = std::max(-1.f, std::min(x, static_cast(width))); + y = std::max(-1.f, std::min(y, static_cast(height))); + + const float xi = std::floor(x); + const float yi = std::floor(y); + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1u8(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, dx, dy); +} + +inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + // Calculate sampling position + float in_x = (x + 0.5f) * wr - 0.5f; + float in_y = (y + 0.5f) * hr - 0.5f; + + // Get bounding box offsets + int x_from = std::floor(x * wr - 0.5f - in_x); + int y_from = std::floor(y * hr - 0.5f - in_y); + int x_to = std::ceil((x + 1) * wr - 0.5f - in_x); + int y_to = std::ceil((y + 1) * hr - 0.5f - in_y); + + // Clamp position to borders + in_x = std::max(-1.f, std::min(in_x, static_cast(width))); + in_y = std::max(-1.f, std::min(in_y, static_cast(height))); + + // Clamp bounding box offsets to borders + x_from = ((in_x + x_from) < -1) ? -1 : x_from; + y_from = ((in_y + y_from) < -1) ? -1 : y_from; + x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to; + y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to; + + // Get pixel index + const int xi = std::floor(in_x); + const int yi = std::floor(in_y); + + // Bounding box elements in each dimension + const int x_elements = (x_to - x_from + 1); + const int y_elements = (y_to - y_from + 1); + ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0); + + // Sum pixels in area + int sum = 0; + for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) + { + const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; + sum = std::accumulate(ptr, ptr + x_elements, sum); + } + + // Return average + return sum / (x_elements * y_elements); +} + +template +struct IncrementIterators +{ + template + static void unroll(T &&it, Ts &&... iterators) + { + it.increment(dimension); + IncrementIterators::unroll(std::forward(iterators)...); + } + + template + static void unroll(T &&it) + { + it.increment(dimension); + // End of recursion + } + + static void unroll() + { + // End of recursion + } +}; + +template +struct ForEachDimension +{ + template + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + { + const auto &d = w[dim - 1]; + + for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...)) + { + id.set(dim - 1, v); + ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...); + } + } +}; + +template <> +struct ForEachDimension<0> +{ + template + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + { + lambda_function(id); + } +}; + +template +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators) +{ + w.validate(); + + Coordinates id; + ForEachDimension::unroll(w, id, std::forward(lambda_function), std::forward(iterators)...); +} + +inline constexpr Iterator::Iterator() + : _ptr(nullptr), _dims() +{ +} + +inline Iterator::Iterator(const ITensor *tensor, const Window &win) + : Iterator() +{ + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + const ITensorInfo *info = tensor->info(); + ARM_COMPUTE_ERROR_ON(info == nullptr); + const Strides &strides = info->strides_in_bytes(); + + _ptr = tensor->buffer() + info->offset_first_element_in_bytes(); + + //Initialize the stride for each dimension and calculate the position of the first element of the iteration: + for(unsigned int n = 0; n < info->num_dimensions(); ++n) + { + _dims[n]._stride = win[n].step() * strides[n]; + std::get<0>(_dims)._dim_start += strides[n] * win[n].start(); + } + + //Copy the starting point to all the dimensions: + for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n) + { + _dims[n]._dim_start = std::get<0>(_dims)._dim_start; + } + + ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, info->num_dimensions()); +} + +inline void Iterator::increment(const size_t dimension) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + + _dims[dimension]._dim_start += _dims[dimension]._stride; + + for(unsigned int n = 0; n < dimension; ++n) + { + _dims[n]._dim_start = _dims[dimension]._dim_start; + } +} + +inline constexpr int Iterator::offset() const +{ + return _dims.at(0)._dim_start; +} + +inline constexpr uint8_t *Iterator::ptr() const +{ + return _ptr + _dims.at(0)._dim_start; +} + +inline void Iterator::reset(const size_t dimension) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions - 1); + + _dims[dimension]._dim_start = _dims[dimension + 1]._dim_start; + + for(unsigned int n = 0; n < dimension; ++n) + { + _dims[n]._dim_start = _dims[dimension]._dim_start; + } +} + +inline bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, int fixed_point_position) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_data_type(data_type); + info.set_tensor_shape(shape); + info.set_num_channels(num_channels); + info.set_fixed_point_position(fixed_point_position); + return true; + } + + return false; +} + +inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_tensor_shape(shape); + return true; + } + + return false; +} + +inline bool set_format_if_unknown(ITensorInfo &info, Format format) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_format(format); + return true; + } + + return false; +} + +inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_data_type(data_type); + return true; + } + + return false; +} + +inline bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position) +{ + if(info.fixed_point_position() == 0 && (info.data_type() == DataType::QS8 || info.data_type() == DataType::QS16)) + { + info.set_fixed_point_position(fixed_point_position); + return true; + } + + return false; +} +} // namespace arm_compute diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h new file mode 100644 index 0000000000..cf7490d53e --- /dev/null +++ b/arm_compute/core/IAccessWindow.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Decrease @p required in steps of @p step until it's less than @p available. + * + * @param[in] required Number of required bytes. + * @param[in] available Number of available bytes. + * @param[in] step Step size used to decrease required bytes. + * + * @return Largest value smaller than @p available that is a multiple of @p step + * + **/ +inline int adjust_down(int required, int available, int step) +{ + ARM_COMPUTE_ERROR_ON(step <= 0); + + return required - step * ((required - available + step - 1) / step); +} + +/** Increase @p required in steps of @p step until it's greater than @p available. + * + * @param[in] required Number of required bytes. + * @param[in] available Number of available bytes. + * @param[in] step Step size used to increase required bytes. + * + * @return Largest value smaller than @p available that is a multiple of @p step + * + **/ +inline int adjust_up(int required, int available, int step) +{ + ARM_COMPUTE_ERROR_ON(step <= 0); + + return required + step * ((available - required + step - 1) / step); +} + +/** Interface describing methods to update access window and padding based on kernel parameters. */ +class IAccessWindow +{ +public: + virtual ~IAccessWindow() = default; + /** Shrink the window if padding is not large enough. + * + * @param[in] window Window used by the kernel. + * + * @return True if the window has been changed. + */ + virtual bool update_window_if_needed(Window &window) const = 0; + /** Increase the padding to be large enough for the window. + * + * @param[in] window Window used by the kernel. + * + * @return True if the padding has been changed. + */ + virtual bool update_padding_if_needed(const Window &window) const = 0; + /** Compute the valid region based on access pattern and valid region of the inputs. + * + * @note This method assumes that there is no border. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + * @param[in] border_undefined Undefined borders are excluded from the valid region. + * @param[in] border_size Size of the border around the XY-plane of the tensor. + */ + virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0; +}; + +/** Implementation of a rectangular access pattern. */ +class AccessWindowRectangle : public IAccessWindow +{ +public: + /** Constructor for a rectangular access pattern. + * + * @note Width and height have to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] y Offset of the access in Y direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] height Number of elements that are accessed in Y direction. + */ + AccessWindowRectangle(ITensorInfo *info, int x, int y, int width, int height) + : AccessWindowRectangle(info, x, y, width, height, 1.f, 1.f) + { + } + + /** Constructor for a rectangular access pattern. + * + * @note Width, height and scale have to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] y Offset of the access in Y direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] height Number of elements that are accessed in Y direction. + * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined + * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowRectangle(ITensorInfo *info, int x, int y, int width, int height, float scale_x, float scale_y) + : _info(info), _x(x), _y(y), _width(width), _height(height), _scale_x(scale_x), _scale_y(scale_y) + { + ARM_COMPUTE_ERROR_ON(width < 0); + ARM_COMPUTE_ERROR_ON(height < 0); + ARM_COMPUTE_ERROR_ON(scale_x < 0); + ARM_COMPUTE_ERROR_ON(scale_y < 0); + } + + AccessWindowRectangle(const AccessWindowRectangle &) = delete; + AccessWindowRectangle &operator=(const AccessWindowRectangle &) = delete; + AccessWindowRectangle(AccessWindowRectangle &&) = default; + AccessWindowRectangle &operator=(AccessWindowRectangle &&) = default; + ~AccessWindowRectangle() = default; + + /** Set the valid region based on access pattern, valid region of the inputs and border mode. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + * @param[in] border_undefined (Optional) Undefined borders are excluded from the valid region. + * @param[in] border_size (Optional) Size of the border around the XY-plane of the tensor. + */ + void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0)); + + /** Compute the valid region based on access pattern, valid region of the inputs and border mode. + * + * @note This method assumes that there is no border. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + ValidRegion compute_valid_region(const Window &window, const ValidRegion &input_valid_region) const; + + // Inherited methods overridden: + + /** @note This method assumes that all elements written by the kernel are valid. */ + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + +protected: + ITensorInfo *_info; + int _x; + int _y; + int _width; + int _height; + float _scale_x; + float _scale_y; +}; + +/** Implementation of a column access pattern. */ +class AccessWindowVertical : public AccessWindowRectangle +{ +public: + /** Constructor for a column access pattern. + * + * @note Height has to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] y Offset of the access in Y direction. + * @param[in] height Number of elements that are accessed in Y direction. + * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowVertical(ITensorInfo *info, int y, int height, float scale_y = 1.f) + : AccessWindowRectangle(info, 0, y, 1, height, 1.f, scale_y) + { + ARM_COMPUTE_ERROR_ON(height < 0); + ARM_COMPUTE_ERROR_ON(scale_y < 0); + } +}; + +/** Implementation of a row access pattern. */ +class AccessWindowHorizontal : public AccessWindowRectangle +{ +public: + /** Constructor for a row access pattern. + * + * @note Width has to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowHorizontal(ITensorInfo *info, int x, int width, float scale_x = 1.f) + : AccessWindowRectangle(info, x, 0, width, 1, scale_x, 1.f) + { + ARM_COMPUTE_ERROR_ON(width < 0); + ARM_COMPUTE_ERROR_ON(scale_x < 0); + } +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_H__*/ diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h new file mode 100644 index 0000000000..2ed56100cf --- /dev/null +++ b/arm_compute/core/IArray.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IARRAY_H__ +#define __ARM_COMPUTE_IARRAY_H__ + +#include "arm_compute/core/Error.h" +#include +#include + +namespace arm_compute +{ +class KeyPoint; +class Coordinates2D; +class DetectionWindow; +class Size2D; + +/** Array of type T */ +template +class IArray +{ +public: + /** Default constructor */ + IArray() + : _num_values(0), _max_size(0) {}; + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + IArray(size_t max_num_values) + : _num_values(0), _max_size(max_num_values) + { + } + /** Maximum number of values which can be stored in this array + * + * @return Maximum number of values + */ + size_t max_num_values() const + { + return _max_size; + } + /** Default virtual destructor */ + virtual ~IArray() = default; + /** Number of values currently stored in the array + * + * @return Number of values currently stored in the array or max_num_values + 1 if the array is overflowed. + */ + size_t num_values() const + { + return _num_values; + } + /** Append the passed argument to the end of the array if there is room. + * + * @param[in] val Value to add to the array. + * + * @return True if the point was successfully added to the array. False if the array is full and the point couldn't be added. + */ + bool push_back(const T &val) + { + ARM_COMPUTE_ERROR_ON(0 == _max_size); + if(_num_values >= max_num_values()) + { + _num_values = max_num_values() + 1; + return false; + } + at(_num_values) = val; + _num_values++; + return true; + } + /** Clear all the points from the array. */ + void clear() + { + _num_values = 0; + } + /** Did we lose some values because the array is too small? + * + * @return True if we tried to add a value using push_back() but there wasn't any room left to store it. + * False if all the values were successfully added to the array. + */ + bool overflow() const + { + return _num_values > max_num_values(); + } + /** Pointer to the first element of the array + * + * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_poins(). + * + * @return A pointer to the first element of the array + */ + virtual T *buffer() const = 0; + /** Reference to the element of the array located at the given index + * + * @param[in] index Index of the element + * + * @return A reference to the element of the array located at the given index. + */ + virtual T &at(size_t index) const + { + ARM_COMPUTE_ERROR_ON(buffer() == nullptr); + ARM_COMPUTE_ERROR_ON(index >= max_num_values()); + return buffer()[index]; + } + /** Resizes the array to contain "num" elements. If "num" is smaller than the maximum array size, the content is reduced to its first "num" elements, + * "num" elements can't be bigger than the maximum number of values which can be stored in this array. + * + * @param[in] num The new array size in number of elements + */ + void resize(size_t num) + { + ARM_COMPUTE_ERROR_ON(num > max_num_values()); + _num_values = num; + }; + +private: + size_t _num_values; + size_t _max_size; +}; +using IKeyPointArray = IArray; +using ICoordinates2DArray = IArray; +using IDetectionWindowArray = IArray; +using ISize2DArray = IArray; +using IUInt8Array = IArray; +using IUInt16Array = IArray; +using IUInt32Array = IArray; +using IInt16Array = IArray; +using IInt32Array = IArray; +using IFloatArray = IArray; +} +#endif /* __ARM_COMPUTE_IARRAY_H__ */ diff --git a/arm_compute/core/IDistribution.h b/arm_compute/core/IDistribution.h new file mode 100644 index 0000000000..b57543a3bf --- /dev/null +++ b/arm_compute/core/IDistribution.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IDISTRIBUTION_H__ +#define __ARM_COMPUTE_IDISTRIBUTION_H__ + +#include +#include + +namespace arm_compute +{ +/** Interface for distribution objects */ +class IDistribution +{ +public: + /** Default virtual destructor */ + virtual ~IDistribution() = default; + /** Returns the dimensions of the distribution. + * + * @note This is fixed to 1-dimensional distribution for now. + * @return Dimensions of the distribution. + */ + virtual size_t dimensions() const = 0; + /** Returns the total size in bytes of the distribution. + * + * @return Total size of the distribution in bytes. + */ + virtual size_t size() const = 0; + /** Returns a pointer to the start of the distribution. + * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_bins() + * + * @return Pointer to the start of the distribution. + */ + virtual uint32_t *buffer() const = 0; + /** Clears the distribution by setting every element to zero. */ + void clear() const; +}; +} +#endif /* __ARM_COMPUTE_IDISTRIBUTION_H__ */ diff --git a/arm_compute/core/IDistribution1D.h b/arm_compute/core/IDistribution1D.h new file mode 100644 index 0000000000..ca8bfc0a7d --- /dev/null +++ b/arm_compute/core/IDistribution1D.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_IDISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution.h" + +#include +#include + +namespace arm_compute +{ +/** 1D Distribution interface */ +class IDistribution1D : public IDistribution +{ +public: + /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + IDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Returns the number of bins that the distribution has. + * + * @return Number of bins of the distribution. + */ + size_t num_bins() const; + /** Returns the offset of the distribution. + * + * @return Offset of the distribution. + */ + int32_t offset() const; + /** Returns the range of the distribution. + * + * @return Range of the distribution. + */ + uint32_t range() const; + /** Returns the window of the distribution, which is the range divided by the number of bins. + * + * @note If range is not divided by the number of bins then it is invalid. + * + * @return Window of the distribution. + */ + uint32_t window() const; + /** Sets the range of the distribution. + * + * @param[in] range New range of the distribution to be set. + */ + void set_range(uint32_t range); + + // Inherited methods overridden: + size_t size() const override; + size_t dimensions() const override; + +private: + size_t _num_bins; /**< Number of bins. */ + int32_t _offset; /**< Offset, which indicate the start of the usable values. */ + uint32_t _range; /**< The total number of consecutive values of the distribution interval */ +}; +} +#endif /* __ARM_COMPUTE_IDISTRIBUTION1D_H__ */ diff --git a/arm_compute/core/IHOG.h b/arm_compute/core/IHOG.h new file mode 100644 index 0000000000..8bf713ae82 --- /dev/null +++ b/arm_compute/core/IHOG.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IHOG_H__ +#define __ARM_COMPUTE_IHOG_H__ + +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class HOGInfo; +/** Interface for HOG data-object */ +class IHOG +{ +public: + /** Interface to be implemented by the child class to return the HOG's metadata + * + * @return A pointer to the HOG's metadata. + */ + virtual const HOGInfo *info() const = 0; + /** Default virtual destructor */ + virtual ~IHOG() = default; + /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor + * + * @note Other elements of the array can be accessed using descriptor()[idx] for idx=[0, descriptor_size() - 1] + * + * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor + */ + virtual float *descriptor() const = 0; +}; +} +#endif /* __ARM_COMPUTE_IHOG_H__ */ diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h new file mode 100644 index 0000000000..4f3812b6da --- /dev/null +++ b/arm_compute/core/IKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IKERNEL_H__ +#define __ARM_COMPUTE_IKERNEL_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" + +namespace arm_compute +{ +/** Common information for all the kernels */ +class IKernel +{ +public: + /** Constructor */ + IKernel(); + /** Destructor */ + virtual ~IKernel() = default; + /** Indicates whether or not the kernel is parallelisable + * + * If the kernel is parallelisable then the window returned by window() can be split into sub-windows + * which can then be run in parallel. + * + * If the kernel is not parallelisable then only the window returned by window() can be passed to run() + * + * @return True if the kernel is parallelisable + */ + virtual bool is_parallelisable() const; + /** The size of the border for that kernel + * + * @return The width in number of elements of the border. + */ + virtual BorderSize border_size() const; + /** The maximum window the kernel can be executed on + * + * @return The maximum window the kernel can be executed on. + */ + const Window &window() const; + +protected: + /** Configure the kernel's window + * + * @param[in] window The maximum window which will be returned by window() + */ + void configure(const Window &window); + +private: + Window _window; +}; +} +#endif /*__ARM_COMPUTE_IKERNEL_H__ */ diff --git a/arm_compute/core/ILut.h b/arm_compute/core/ILut.h new file mode 100644 index 0000000000..5223aea67a --- /dev/null +++ b/arm_compute/core/ILut.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ILUT_H__ +#define __ARM_COMPUTE_ILUT_H__ + +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Lookup Table object interface. */ +class ILut +{ +public: + /** Default virtual destructor */ + virtual ~ILut() = default; + /** Returns the total number of elements in the LUT. + * + * @return Total number of elements. + */ + virtual size_t num_elements() const = 0; + /** Indicates the offset that needs to be applied to the raw index before performing a lookup in the LUT. + * + * @return The normalization offset. + */ + virtual uint32_t index_offset() const = 0; + /** Returns the total size in bytes of the LUT. + * + * @return Total size of the LUT in bytes. + */ + virtual size_t size_in_bytes() const = 0; + /** Returns the type of the LUT. + * + * @return The type of the LUT. + */ + virtual DataType type() const = 0; + /** Returns a pointer to the start of the LUT. + * Other elements of the LUT can be accessed using buffer()[idx] for 0 <= idx < num_elements(). + * + * @return Pointer to the start of the lut. + */ + virtual uint8_t *buffer() const = 0; + /** Clears the LUT by setting every element to zero. */ + virtual void clear() = 0; +}; +} +#endif /* __ARM_COMPUTE_ILUT_H__ */ diff --git a/arm_compute/core/IMultiHOG.h b/arm_compute/core/IMultiHOG.h new file mode 100644 index 0000000000..e91da75398 --- /dev/null +++ b/arm_compute/core/IMultiHOG.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMULTIHOG_H__ +#define __ARM_COMPUTE_IMULTIHOG_H__ + +#include "arm_compute/core/IHOG.h" + +#include + +namespace arm_compute +{ +/** Interface for storing multiple HOG data-objects */ +class IMultiHOG +{ +public: + /** Default destructor */ + virtual ~IMultiHOG() = default; + /** The number of HOG models stored + * + * @return The number of HOG models stored + */ + virtual size_t num_models() const = 0; + /** Return a pointer to the requested HOG model + * + * @param[in] index The index of the wanted HOG model. + * + * @return A pointer pointed to the HOG model + */ + virtual IHOG *model(size_t index) = 0; + /** Return a const pointer to the requested HOG model + * + * @param[in] index The index of the wanted HOG model. + * + * @return A const pointer pointed to the HOG model + */ + virtual const IHOG *model(size_t index) const = 0; +}; +} + +#endif /* __ARM_COMPUTE_IMULTIHOG_H__ */ diff --git a/arm_compute/core/IMultiImage.h b/arm_compute/core/IMultiImage.h new file mode 100644 index 0000000000..6ed3c785ca --- /dev/null +++ b/arm_compute/core/IMultiImage.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMULTIIMAGE_H__ +#define __ARM_COMPUTE_IMULTIIMAGE_H__ + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; +class MultiImageInfo; + +/** Interface for multi-planar images */ +class IMultiImage +{ +public: + /** Destructor */ + virtual ~IMultiImage() = default; + /** Interface to be implemented by the child class to return the multi-planar image's metadata + * + * @return A pointer to the image's metadata. + */ + virtual const MultiImageInfo *info() const = 0; + /** Return a pointer to the requested plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A pointer pointed to the plane + */ + virtual IImage *plane(unsigned int index) = 0; + /** Return a constant pointer to the requested plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A constant pointer pointed to the plane + */ + virtual const IImage *plane(unsigned int index) const = 0; +}; +} +#endif /*__ARM_COMPUTE_IMULTIIMAGE_H__ */ diff --git a/arm_compute/core/IPyramid.h b/arm_compute/core/IPyramid.h new file mode 100644 index 0000000000..e5d7011cf9 --- /dev/null +++ b/arm_compute/core/IPyramid.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IPYRAMID_H__ +#define __ARM_COMPUTE_IPYRAMID_H__ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Interface for pyramid data-object */ +class IPyramid +{ +public: + /** Default virtual destructor */ + virtual ~IPyramid() = default; + /** Interface to be implemented by the child class to return the Pyramid's metadata + * + * @return A pointer to the Pyramid's metadata. + */ + virtual const PyramidInfo *info() const = 0; + /** Retrieves a level of the pyramid as a ITensor pointer + * + * @param[in] index The index of the level, such that index is less than levels. + * + * @return An ITensor pointer + */ + virtual ITensor *get_pyramid_level(size_t index) const = 0; +}; +} + +#endif /* __ARM_COMPUTE_IPYRAMID_H__ */ diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h new file mode 100644 index 0000000000..202b50a0d8 --- /dev/null +++ b/arm_compute/core/ITensor.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSOR_H__ +#define __ARM_COMPUTE_ITENSOR_H__ + +#include "arm_compute/core/TensorInfo.h" + +#include + +namespace arm_compute +{ +class Coordinates; + +/** Interface for NEON tensor */ +class ITensor +{ +public: + /** Interface to be implemented by the child class to return the tensor's metadata + * + * @return A pointer to the tensor's metadata. + */ + virtual ITensorInfo *info() const = 0; + /** Interface to be implemented by the child class to return the tensor's metadata + * + * @return A pointer to the tensor's metadata. + */ + virtual ITensorInfo *info() = 0; + /** Default virtual destructor */ + virtual ~ITensor() = default; + /** Interface to be implemented by the child class to return a pointer to CPU memory + * + * @return A CPU pointer to the beginning of the image's allocation. + */ + virtual uint8_t *buffer() const = 0; + + /** Return a pointer to the element at the passed coordinates + * + * @param[in] id Coordinates of the element + * + * @return Pointer to the requested element + */ + inline uint8_t *ptr_to_element(const Coordinates &id) const + { + return buffer() + info()->offset_element_in_bytes(id); + } + + /** Copy the content of another tensor. + * + * @note The number of dimensions of the source tensor must be less or equal to those of the destination tensor. + * + * @note All dimensions of the destination tensor must be greater or equal to the source tensor ones. + * + * @note num_channels() and element_size() of both tensors must match. + * + * @param[in] src Source tensor to copy from. + */ + void copy_from(const ITensor &src); + + /** Print a tensor to a given stream using user defined formatting information + * + * @param s Output stream + * @param io_fmt Format information + */ + void print(std::ostream &s, IOFormatInfo io_fmt = IOFormatInfo()) const; +}; + +using IImage = ITensor; +} +#endif /*__ARM_COMPUTE_ITENSOR_H__ */ diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h new file mode 100644 index 0000000000..bb3ac6e35e --- /dev/null +++ b/arm_compute/core/ITensorInfo.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSORINFO_H__ +#define __ARM_COMPUTE_ITENSORINFO_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace arm_compute +{ +/** Store the tensor's metadata */ +class ITensorInfo +{ +public: + /** Default virtual destructor */ + virtual ~ITensorInfo() = default; + /** Set the data type to the specified value. + * + * @warning This resets the format to UNKNOWN. + * + * @param[in] data_type The new data type. + */ + virtual void set_data_type(DataType data_type) = 0; + /** Set the number of channels to the specified value. + * + * @warning This resets the format to UNKNOWN. + * + * @param[in] num_channels New number of channels. + */ + virtual void set_num_channels(int num_channels) = 0; + /** Set the format of an already initialized tensor. + * + * @note If the data type has already been configured (i.e. not UNKNOWN) it + * must match the new format. If data type hasn't been configured it will + * be based on the format. + * + * @param[in] format Single-plane format of the tensor. + */ + virtual void set_format(Format format) = 0; + /** Set the shape of an already initialized tensor. + * + * @warning Changing the shape requires to recompute the strides and is + * therefore only possible if the tensor hasn't been allocated yet. + * + * @param[in] shape New tensor shape. + */ + virtual void set_tensor_shape(TensorShape shape) = 0; + /** Set the fixed point position to the specified value + * + * @warning The fixed point position must be set once the data type has been configured + * + * @param[in] fixed_point_position The new fixed point position + */ + virtual void set_fixed_point_position(int fixed_point_position) = 0; + /** Update the offset to the first element and the strides to automatically computed values. + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @return True if the strides or the offset to the first element have changed. + */ + virtual bool auto_padding() = 0; + /** Update the offset to the first element, the strides and the total size. + * + * @note This function can only increase the offset, strides and total size. + * + * @param[in] padding Padding around the XY plane in number of elements. + * + * @return True if the strides, offset and total size have changed. + */ + virtual bool extend_padding(const PaddingSize &padding) = 0; + /** Return the size of the requested dimension + * + * @param[in] index Index of the dimension + * + * @return Dimension of the requested dimension + */ + virtual size_t dimension(size_t index) const = 0; + /** The strides in bytes for accessing each dimension of the tensor + * + * @return Strides in bytes for each tensor dimension + */ + virtual const Strides &strides_in_bytes() const = 0; + /** The offset from the beginning of the memory allocation to the first element of the tensor. + * This can be used to access efficiently elements in a 2D tensor + * + * @return The offset in bytes to access the first element of the tensor. + */ + virtual size_t offset_first_element_in_bytes() const = 0; + /** The offset in bytes from the beginning of the memory allocation to access the element at position (x, y, z ...) + * + * @param[in] pos Vector with the coordinates of the element to access. + * The size of this vector must be equal to the number of dimensions of the tensor + * + * @return Offset in bytes from the beginning of the memory allocation to access the element (x, y, z, ...) + */ + virtual size_t offset_element_in_bytes(const Coordinates &pos) const = 0; + /** Fixed point position used when the tensor data type is QS8 or QS16 + * + * @return The fixed point position that expresses the number of bits for the fractional part of the number + */ + virtual int fixed_point_position() const = 0; + /** Element size in bytes calculated as data_size() * num_channels() + * + * @return The size of one element in bytes + */ + virtual size_t element_size() const = 0; + /** The number of dimensions of the tensor (rank) + * + * @return The number of dimensions of the tensor (rank) + */ + virtual size_t num_dimensions() const = 0; + /** The number of channels for each tensor element + * + * @return The number of channels for each tensor element + */ + virtual size_t num_channels() const = 0; + /** Size for each dimension of the tensor + * + * @return A vector with the size for each dimension of the tensor + */ + virtual const TensorShape &tensor_shape() const = 0; + /** Data type used for each element of the tensor + * + * @return Tensor data type + */ + virtual DataType data_type() const = 0; + /** Colour format of the image + * + * @return Colour format of the image + */ + virtual Format format() const = 0; + /** Returns the total size of the tensor in bytes. + * + * @return Total size of the tensor in bytes. + */ + virtual size_t total_size() const = 0; + /** Padding of tensor. + * + * @return Padding. + */ + virtual PaddingSize padding() const = 0; + /** Checks if the tensor has been allocated with padding or not. + * + * @return True if padding is allocated in the tensor, otherwise false. + */ + virtual bool has_padding() const = 0; + /** Flag indicating whether the size of the tensor can be changed. + * + * @return True if the tensor size can be changed. + */ + virtual bool is_resizable() const = 0; + /** Set the flag whether the tensor size can be changed. + * + * @param[in] is_resizable Flag that marks the tensor if it can be changed or not. + */ + virtual void set_is_resizable(bool is_resizable) = 0; + /** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined. + * + * @return The valid region. + */ + virtual ValidRegion valid_region() const = 0; + /** Set the valid region of the tensor. + * + * @param[in] valid_region Valid region to set. + */ + virtual void set_valid_region(ValidRegion valid_region) = 0; +}; +} +#endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/MultiImageInfo.h b/arm_compute/core/MultiImageInfo.h new file mode 100644 index 0000000000..6d76953845 --- /dev/null +++ b/arm_compute/core/MultiImageInfo.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIIMAGEINFO_H__ +#define __ARM_COMPUTE_MULTIIMAGEINFO_H__ + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** Store the multi-planar image's metadata */ +class MultiImageInfo +{ +public: + /** Constructor */ + MultiImageInfo(); + /** Initialize the metadata structure with the given parameters + * + * @param[in] width Width of the image (in number of pixels) + * @param[in] height Height of the image (in number of pixels) + * @param[in] format Colour format of the image. + */ + void init(unsigned int width, unsigned int height, Format format); + /** Colour format of the image + * + * @return Colour format of the image + */ + Format format() const; + /** Width in pixels + * + * @return The width in pixels + */ + unsigned int width() const; + /** Height in pixels + * + * @return The height in pixels + */ + unsigned int height() const; + +protected: + unsigned int _width; + unsigned int _height; + Format _format; +}; +} +#endif /*__ARM_COMPUTE_MULTIIMAGEINFO_H__ */ diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/NEON/INEKernel.h new file mode 100644 index 0000000000..3ac8164a51 --- /dev/null +++ b/arm_compute/core/NEON/INEKernel.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INEKERNEL_H__ +#define __ARM_COMPUTE_INEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +using INEKernel = ICPPKernel; +} +#endif /*__ARM_COMPUTE_INEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h new file mode 100644 index 0000000000..ca25532ef1 --- /dev/null +++ b/arm_compute/core/NEON/INESimpleKernel.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INESIMPLEKERNEL_H__ +#define __ARM_COMPUTE_INESIMPLEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPSimpleKernel.h" + +namespace arm_compute +{ +using INESimpleKernel = ICPPSimpleKernel; +} +#endif /*__ARM_COMPUTE_INESIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl new file mode 100644 index 0000000000..9be7c8a658 --- /dev/null +++ b/arm_compute/core/NEON/NEColorConvertHelper.inl @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace +{ +constexpr float red_coef_bt709 = 1.5748F; +constexpr float green_coef_bt709 = -0.1873f; +constexpr float green_coef2_bt709 = -0.4681f; +constexpr float blue_coef_bt709 = 1.8556f; + +constexpr float rgb2yuv_bt709_kr = 0.2126f; +constexpr float rgb2yuv_bt709_kb = 0.0722f; +// K_g = 1 - K_r - K_b +constexpr float rgb2yuv_bt709_kg = 0.7152f; +// C_u = 1 / (2 * (1 - K_b)) +constexpr float rgb2yuv_bt709_cu = 0.5389f; +// C_v = 1 / (2 * (1 - K_r)) +constexpr float rgb2yuv_bt709_cv = 0.6350f; + +inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out) +{ + const auto tmp1 = vmovl_u8(vget_low_u8(in)); + out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1))); + out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1))); + const auto tmp2 = vmovl_u8(vget_high_u8(in)); + out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2))); + out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2))); +} + +inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out) +{ + out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])), + vqmovn_u32(vcvtq_u32_f32(in2.val[0])))); + out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])), + vqmovn_u32(vcvtq_u32_f32(in2.val[1])))); + out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])), + vqmovn_u32(vcvtq_u32_f32(in2.val[2])))); +} + +inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out) +{ + const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])), + vqmovn_u32(vcvtq_u32_f32(in.val[1]))); + const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])), + vqmovn_u32(vcvtq_u32_f32(in.val[3]))); + out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high)); +} + +inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec, + float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec) +{ + /* + Y'= 0.2126*R' + 0.7152*G' + 0.0722*B' + U'=-0.1146*R' - 0.3854*G' + 0.5000*B' + V'= 0.5000*R' - 0.4542*G' - 0.0458*B' + */ + const auto c128 = vdupq_n_f32(128.f); + + // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b + yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr); + yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg); + yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb); + + // U = (B - Y) / (2 * (1 - K_b)) + uvec = vsubq_f32(bvec, yvec); + uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu); + + // V = (R - Y) / (2 * (1 - K_r)) + vvec = vsubq_f32(rvec, yvec); + vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv); +} + +inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val, + float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha) +{ + float32x4x3_t rgb1, rgb2; + + // Compute: cb - 128 and cr - 128; + const auto c128 = vdupq_n_f32(128.f); + uvec_val = vsubq_f32(uvec_val, c128); + vvec_val = vsubq_f32(vvec_val, c128); + + // Compute: + // r = 0.0000f*f_u + 1.5748f*f_v; + // g = 0.1873f*f_u - 0.4681f*f_v; + // b = 1.8556f*f_u + 0.0000f*f_v; + const auto red = vmulq_n_f32(vvec_val, red_coef_bt709); + const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709); + const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709), + vmulq_n_f32(vvec_val, green_coef2_bt709)); + + // Compute the final r,g,b values using y1 for the first texel and y2 for the second one. + // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t + // and written back to memory using vst3 instruction + + rgb1.val[0] = vaddq_f32(yvec_val, red); + rgb1.val[1] = vaddq_f32(yvec_val, green); + rgb1.val[2] = vaddq_f32(yvec_val, blue); + + rgb2.val[0] = vaddq_f32(yyvec_val, red); + rgb2.val[1] = vaddq_f32(yyvec_val, green); + rgb2.val[2] = vaddq_f32(yyvec_val, blue); + + uint8x8x3_t u8_rgb; + convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb); + + if(!alpha) + { + vst3_lane_u8(&output_ptr[0], u8_rgb, 0); + vst3_lane_u8(&output_ptr[3], u8_rgb, 4); + vst3_lane_u8(&output_ptr[6], u8_rgb, 1); + vst3_lane_u8(&output_ptr[9], u8_rgb, 5); + vst3_lane_u8(&output_ptr[12], u8_rgb, 2); + vst3_lane_u8(&output_ptr[15], u8_rgb, 6); + vst3_lane_u8(&output_ptr[18], u8_rgb, 3); + vst3_lane_u8(&output_ptr[21], u8_rgb, 7); + } + else + { + uint8x8x4_t u8_rgba; + u8_rgba.val[0] = u8_rgb.val[0]; + u8_rgba.val[1] = u8_rgb.val[1]; + u8_rgba.val[2] = u8_rgb.val[2]; + u8_rgba.val[3] = vdup_n_u8(255); + vst4_lane_u8(&output_ptr[0], u8_rgba, 0); + vst4_lane_u8(&output_ptr[4], u8_rgba, 4); + vst4_lane_u8(&output_ptr[8], u8_rgba, 1); + vst4_lane_u8(&output_ptr[12], u8_rgba, 5); + vst4_lane_u8(&output_ptr[16], u8_rgba, 2); + vst4_lane_u8(&output_ptr[20], u8_rgba, 6); + vst4_lane_u8(&output_ptr[24], u8_rgba, 3); + vst4_lane_u8(&output_ptr[28], u8_rgba, 7); + } +} + +inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha) +{ + uint8x16x3_t rgb; + + if(alpha) + { + const auto tmp = vld4q_u8(ptr); + rgb.val[0] = tmp.val[0]; + rgb.val[1] = tmp.val[1]; + rgb.val[2] = tmp.val[2]; + } + else + { + rgb = vld3q_u8(ptr); + } + + return rgb; +} + +inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom) +{ + // Convert the uint8x16_t to float32x4x4_t + float32x4x4_t frvec_top, fgvec_top, fbvec_top; + convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top); + convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top); + convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top); + + float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom; + convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom); + convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom); + convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom); + + float32x4x4_t fyvec_top, fuvec_top, fvvec_top; + float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom; + + for(auto i = 0; i < 4; ++i) + { + rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i], + fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]); + rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i], + fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]); + } + + convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]); + convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]); + convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]); + convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]); + convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]); + convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]); +} + +inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, + const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, + unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, + unsigned char *const __restrict out_uv) +{ + uint8x16x3_t vec_top, vec_bottom; + vec_top.val[0] = rvec_top; + vec_top.val[1] = gvec_top; + vec_top.val[2] = bvec_top; + vec_bottom.val[0] = rvec_bottom; + vec_bottom.val[1] = gvec_bottom; + vec_bottom.val[2] = bvec_bottom; + + rgb_to_yuv_conversion(vec_top, vec_bottom); + + vst1q_u8(out_y_top, vec_top.val[0]); + vst1q_u8(out_y_bottom, vec_bottom.val[0]); + + const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]); + const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]); + const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]); + const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]); + + uint8x8x2_t uvvec; + uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp)); + uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp)); + + vst2_u8(out_uv, uvvec); +} + +inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, + const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, + unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, + unsigned char *const __restrict out_u, + unsigned char *const __restrict out_v) +{ + uint8x16x3_t vec_top, vec_bottom; + vec_top.val[0] = rvec_top; + vec_top.val[1] = gvec_top; + vec_top.val[2] = bvec_top; + vec_bottom.val[0] = rvec_bottom; + vec_bottom.val[1] = gvec_bottom; + vec_bottom.val[2] = bvec_bottom; + + rgb_to_yuv_conversion(vec_top, vec_bottom); + + vst1q_u8(out_y_top, vec_top.val[0]); + vst1q_u8(out_y_bottom, vec_bottom.val[0]); + + const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]); + const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]); + const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]), + vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1])); + + vst1_u8(out_u, vget_low_u8(uvvec)); + vst1_u8(out_v, vget_high_u8(uvvec)); +} + +inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec, + unsigned char *const __restrict out_y, + unsigned char *const __restrict out_u, + unsigned char *const __restrict out_v) +{ + // Convert the uint8x16_t to float32x4x4_t + float32x4x4_t frvec, fgvec, fbvec; + convert_uint8x16_to_float32x4x4(rvec, frvec); + convert_uint8x16_to_float32x4x4(gvec, fgvec); + convert_uint8x16_to_float32x4x4(bvec, fbvec); + + float32x4x4_t fyvec, fuvec, fvvec; + for(auto i = 0; i < 4; ++i) + { + rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i], + fyvec.val[i], fuvec.val[i], fvvec.val[i]); + } + + uint8x16_t yvec, uvec, vvec; + convert_float32x4x4_to_unit8x16(fyvec, yvec); + convert_float32x4x4_to_unit8x16(fuvec, uvec); + convert_float32x4x4_to_unit8x16(fvvec, vvec); + + vst1q_u8(out_y, yvec); + vst1q_u8(out_u, uvec); + vst1q_u8(out_v, vvec); +} +} + +namespace arm_compute +{ +void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta1 = vld3q_u8(in.ptr()); + uint8x16x4_t ta2; + ta2.val[0] = ta1.val[0]; + ta2.val[1] = ta1.val[1]; + ta2.val[2] = ta1.val[2]; + ta2.val[3] = vdupq_n_u8(255); + vst4q_u8(out.ptr(), ta2); + }, + in, out); +} + +void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta1 = vld4q_u8(in.ptr()); + uint8x16x3_t ta2; + ta2.val[0] = ta1.val[0]; + ta2.val[1] = ta1.val[1]; + ta2.val[2] = ta1.val[2]; + vst3q_u8(out.ptr(), ta2); + }, + in, out); +} + +template +void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + constexpr auto shift = yuyv ? 0 : 1; + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + float32x4x4_t uvec, yvec, vvec, yyvec; + const auto ta = vld4q_u8(in.ptr()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + // Convert the uint8x16x4_t to float32x4x4_t + convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec); + convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec); + convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec); + convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec); + + yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + }, + in, out); +} + +template +void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + const auto out_stride = output_ptr->info()->strides_in_bytes().y(); + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + // Convert the uint8x16x4_t to float32x4x4_t + float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; + convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); + convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); + convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec); + convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec); + + yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + + yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); + }, + in_y, in_uv, out); +} + +template +void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + const auto out_stride = output_ptr->info()->strides_in_bytes().y(); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_u = vld1q_u8(in_u.ptr()); + const auto ta_v = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_u.val[0] = U0 U2 U4 U6 ... + //ta_v.val[0] = V0 V2 V4 V6 ... + + // Convert the uint8x16x4_t to float32x4x4_t + float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; + convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); + convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); + convert_uint8x16_to_float32x4x4(ta_u, uvec); + convert_uint8x16_to_float32x4x4(ta_v, vvec); + + yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + + yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); + }, + in_y, in_u, in_v, out); +} + +template +void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = yuyv ? 0 : 1; + + // NV12's UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_top = vld4q_u8(in.ptr()); + const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + uint8x16x2_t yvec; + yvec.val[0] = ta_top.val[0 + shift]; + yvec.val[1] = ta_top.val[2 + shift]; + vst2q_u8(out_y.ptr(), yvec); + + uint8x16x2_t yyvec; + yyvec.val[0] = ta_bottom.val[0 + shift]; + yyvec.val[1] = ta_bottom.val[2 + shift]; + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); + + uint8x16x2_t uvvec; + uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); + uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); + vst2q_u8(out_uv.ptr(), uvvec); + }, + in, out_y, out_uv); +} + +void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + uint8x16x2_t ta_uv; + ta_uv.val[0] = vld1q_u8(in_u.ptr()); + ta_uv.val[1] = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + vst2q_u8(out_uv.ptr(), ta_uv); + }, + in_y, in_u, in_v, out_y, out_uv); +} + +template +void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]); + vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]); + }, + in_y, in_uv, out_y, out_u, out_v); +} + +template +void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = yuyv ? 0 : 1; + + // Destination's UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_top = vld4q_u8(in.ptr()); + const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + uint8x16x2_t yvec; + yvec.val[0] = ta_top.val[0 + shift]; + yvec.val[1] = ta_top.val[2 + shift]; + vst2q_u8(out_y.ptr(), yvec); + + uint8x16x2_t yyvec; + yyvec.val[0] = ta_bottom.val[0 + shift]; + yyvec.val[1] = ta_bottom.val[2 + shift]; + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); + + uint8x16_t uvec; + uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); + vst1q_u8(out_u.ptr(), uvec); + + uint8x16_t vvec; + vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); + vst1q_u8(out_v.ptr(), vvec); + }, + in, out_y, out_u, out_v); +} + +template +void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + + uint8x16x2_t uvec; + uvec.val[0] = ta_uv.val[0 + shift]; + uvec.val[1] = ta_uv.val[0 + shift]; + vst2q_u8(out_u.ptr(), uvec); + vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); + + uint8x16x2_t vvec; + vvec.val[0] = ta_uv.val[1 - shift]; + vvec.val[1] = ta_uv.val[1 - shift]; + vst2q_u8(out_v.ptr(), vvec); + vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); + }, + in_y, in_uv, out_y, out_u, out_v); +} + +void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_u = vld1q_u8(in_u.ptr()); + const auto ta_v = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_u = U0 U2 U4 U6 ... + //ta_v = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + + uint8x16x2_t uvec; + uvec.val[0] = ta_u; + uvec.val[1] = ta_u; + vst2q_u8(out_u.ptr(), uvec); + vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); + + uint8x16x2_t vvec; + vvec.val[0] = ta_v; + vvec.val[1] = ta_v; + vst2q_u8(out_v.ptr(), vvec); + vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); + }, + in_y, in_u, in_v, out_y, out_u, out_v); +} + +template +void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb_top = load_rgb(in.ptr(), alpha); + const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], + ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], + out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), + out_uv.ptr()); + }, + in, out_y, out_uv); +} + +template +void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb_top = load_rgb(in.ptr(), alpha); + const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], + ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], + out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), + out_u.ptr(), out_v.ptr()); + }, + in, out_y, out_u, out_v); +} + +template +void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb = load_rgb(in.ptr(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2], + out_y.ptr(), out_u.ptr(), out_v.ptr()); + }, + in, out_y, out_u, out_v); +} +} diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h new file mode 100644 index 0000000000..fb712611cb --- /dev/null +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__ +#define __ARM_COMPUTE_NEFIXEDPOINT_H__ + +#include "arm_compute/core/FixedPoint.h" + +#include + +namespace arm_compute +{ +using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */ +using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */ +using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */ +using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */ +using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */ +using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */ +using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */ +using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */ +using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */ +using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */ +using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */ +using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */ +using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */ +using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */ +using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */ +using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */ + +/** Get the lower half of a 16 elements vector + * + * @param[in] a vector of 16 elements + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vget_low_qs8(qint8x16_t a); + +/** Get the higher half of a 16 elements vector + * + * @param[in] a vector of 16 elements + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vget_high_qs8(qint8x16_t a); + +/** Load a single 8 bit fixed point vector from memory (8 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point vector to load + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vld1_qs8(const qint8_t *addr); + +/** Load a single 8 bit fixed point vector from memory (16 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point vector to load + * + * @return 8 bit fixed point vector (16 elements) + */ +qint8x16_t vld1q_qs8(const qint8_t *addr); + +/** Load a single 16 bit fixed point vector from memory (4 elements) + * + * @param[in] addr Memory address of the 16 bit fixed point vector to load + * + * @return 16 bit fixed point vector (4 elements) + */ +qint16x4_t vld1_qs16(const qint16_t *addr); + +/** Load a single 16 bit fixed point vector from memory (8 elements) + * + * @param[in] addr Memory address of the 16 bit fixed point vector to load + * + * @return 16 bit fixed point vector (8 elements) + */ +qint16x8_t vld1q_qs16(const qint16_t *addr); + +/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point scalar value to load + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vld1_dup_qs8(const qint8_t *addr); + +/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point scalar value to load + * + * @return 8 bit fixed point vector (16 elements) + */ +qint8x16_t vld1q_dup_qs8(const qint8_t *addr); + +/** Store a single 8 bit fixed point vector to memory (8 elements) + * + * @param[in] addr Memory address where the 8 bit fixed point vector should be stored + * @param[in] b 8 bit fixed point vector to store + * + */ +void vst1_qs8(qint8_t *addr, qint8x8_t b); + +/** Store a single 8 bit fixed point vector to memory (16 elements) + * + * @param[in] addr Memory address where the 8 bit fixed point vector should be stored + * @param[in] b 8 bit fixed point vector to store + * + */ +void vst1q_qs8(qint8_t *addr, qint8x16_t b); + +/** Store a single 16 bit fixed point vector to memory (4 elements) + * + * @param[in] addr Memory address where the 16 bit fixed point vector should be stored + * @param[in] b 16 bit fixed point vector to store + * + */ +void vst1_qs16(qint16_t *addr, qint16x4_t b); + +/** Store a single 8 bit fixed point vector to memory (16 elements) + * + * @param[in] addr Memory address where the 16 bit fixed point vector should be stored + * @param[in] b 16 bit fixed point vector to store + * + */ +void vst1q_qs16(qint16_t *addr, qint16x8_t b); + +/** 16 bit fixed point vector saturating narrow (8 elements) + * + * @param[in] a 16 bit fixed point vector to convert + * + * @return 8 bit fixed point vector + */ +qint8x8_t vqmovn_q16(qint16x8_t a); + +/** 8 bit fixed point vector duplicate (8 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint8x8_t vdup_n_qs8(qint8_t a); + +/** 8 bit fixed point vector duplicate (16 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint8x16_t vdupq_n_qs8(qint8_t a); + +/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the vector duplication + */ +qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position); + +/** 16 bit fixed point vector duplicate (8 elements) + * + * @param[in] a 16 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint16x8_t vdupq_n_qs16(qint16x8_t a); + +/** Absolute value of 8 bit fixed point vector (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x8_t vabs_qs8(qint8x8_t a); + +/** Absolute value of 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x16_t vabsq_qs8(qint8x16_t a); + +/** Saturating absolute value of 8 bit fixed point vector (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x8_t vqabs_qs8(qint8x8_t a); + +/** Saturating absolute value of 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x16_t vqabsq_qs8(qint8x16_t a); + +/** 8 bit fixed point vector max (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector max (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector pairwise max (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector pairwise max operation + */ +qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector min (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector min (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector min operation + */ +qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector pairwise min (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector pairwise min operation + */ +qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector add (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition + */ +qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector add (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition + */ +qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector saturating add (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector saturating add (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b); + +/** 16 bit fixed point vector saturating add (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b); + +/** 16 bit fixed point vector saturating add (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b); + +/** 8 bit fixed point vector saturating pairwise add (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +int16x4_t vpaddl_qs8(qint8x8_t a); + +/** 8 bit fixed point vector subtraction (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction + */ +qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector subtraction (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction + */ +qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector saturating subtraction (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector saturating subtraction (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. + */ +qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector multiply (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. + */ +qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); + +/** 8 bit fixed point vector long multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point long vector multiplication. + */ +qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate + */ +qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate + */ +qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow + */ +qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow + */ +qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate long (8 elements). + * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate long + */ +qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector. + * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate long + */ +qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position); + +/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position); + +/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements + * + * @param[in] a 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float32x2x4 + */ +float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position); + +/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements + * + * @param[in] a 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float32x4x4 + */ +float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position); + +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit reciprocal (1/a). + */ +qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit reciprocal (1/a). + */ +qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position); + +/** Division fixed point 8bit (8 elements) + * + * @param[in] a First 8bit fixed point input vector + * @param[in] b Second 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in fixed point format. + */ +qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position); + +/** Division fixed point 8bit (16 elements) + * + * @param[in] a First 8bit fixed point input vector + * @param[in] b Second 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in 8bit fixed point format. + */ +qint8x16_t vdivq_qs8(qint8x16_t a, int8x16_t b, int fixed_point_position); + +/** Perform a 4th degree polynomial approximation. (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit taylor approximation. + */ +template +qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position); + +/** Perform a 4th degree polynomial approximation. (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit taylor approximation. + */ +template +qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating exponential fixed point 8bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit saturating exponential + */ +qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate saturating exponential fixed point 8bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit saturating exponential + */ +qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate logarithm fixed point 16bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit logarithm. + */ +qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate logarithm fixed point 16bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit logarithm. + */ +qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate hyperbolic tangent for fixed point 8bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate hyperbolic tangent for fixed point 8bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating n power for fixed point 8bit (16 elements). + * + * pow(a,b) = e^(b*log(a)) + * + * @param[in] a 8bit fixed point input vector + * @param[in] b 8bit fixed point power vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit power. + */ +qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position); +} +#include "arm_compute/core/NEON/NEFixedPoint.inl" +#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl new file mode 100644 index 0000000000..6db344dc11 --- /dev/null +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -0,0 +1,1018 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +namespace arm_compute +{ +/**< Exponent polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements */ +const std::array exp_tab_qs8 = +{ + { + vdup_n_s8(0x7F), // 0.9978546 + vdup_n_s8(0x3F), // 0.4994721 + vdup_n_s8(0x16), // 0.1763723 + vdup_n_s8(0x05), // 0.0435108 + } +}; + +/**< Exponent polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements */ +const std::array exp_tabq_qs8 = +{ + { + vdupq_n_s8(0x7F), // 0.9978546 + vdupq_n_s8(0x3F), // 0.4994721 + vdupq_n_s8(0x16), // 0.1763723 + vdupq_n_s8(0x05), // 0.0435108 + } +}; + +/**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +const std::array log_tab_qs8 = +{ + { + vdup_n_s8(0x5C), // 1.4384189 + vdup_n_s8(-0x56), // -0.6771900 + vdup_n_s8(0x29), // 0.3218538 + vdup_n_s8(-0x0A), // -0.0832229 + } +}; + +/**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +const std::array log_tabq_qs8 = +{ + { + vdupq_n_s8(0x5C), // 1.4384189 + vdupq_n_s8(-0x56), // -0.6771900 + vdupq_n_s8(0x29), // 0.3218538 + vdupq_n_s8(-0x0A), // -0.0832229 + } +}; + +inline qint8x8_t vget_low_qs8(qint8x16_t a) +{ + return vget_low_s8(a); +} + +inline qint8x8_t vget_high_qs8(qint8x16_t a) +{ + return vget_high_s8(a); +} + +inline qint8x8_t vld1_qs8(const qint8_t *addr) +{ + return vld1_s8(addr); +} + +inline qint8x16_t vld1q_qs8(const qint8_t *addr) +{ + return vld1q_s8(addr); +} + +inline qint16x4_t vld1_qs16(const qint16_t *addr) +{ + return vld1_s16(addr); +} + +inline qint16x8_t vld1q_qs16(const qint16_t *addr) +{ + return vld1q_s16(addr); +} + +inline qint8x8_t vld1_dup_qs8(const qint8_t *addr) +{ + return vld1_dup_s8(addr); +} + +inline qint8x16_t vld1q_dup_qs8(const qint8_t *addr) +{ + return vld1q_dup_s8(addr); +} + +inline void vst1_qs8(qint8_t *addr, qint8x8_t b) +{ + vst1_s8(addr, b); +} + +inline void vst1q_qs8(qint8_t *addr, qint8x16_t b) +{ + vst1q_s8(addr, b); +} + +inline void vst1_qs16(qint16_t *addr, qint16x4_t b) +{ + vst1_s16(addr, b); +} + +inline void vst1q_qs16(qint16_t *addr, qint16x8_t b) +{ + vst1q_s16(addr, b); +} + +inline qint8x8_t vqmovn_qs16(qint16x8_t a) +{ + return vqmovn_s16(a); +} + +inline qint8x8_t vdup_n_qs8(qint8_t a) +{ + return vdup_n_s8(a); +} + +inline qint8x16_t vdupq_n_qs8(qint8_t a) +{ + return vdupq_n_s8(a); +} + +inline qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position) +{ + float32x4x4_t res = + { + { + vdupq_n_f32(a), + vdupq_n_f32(a), + vdupq_n_f32(a), + vdupq_n_f32(a), + } + }; + return vcvtq_qs8_f32(res, fixed_point_position); +} + +inline qint16x8_t vdupq_n_qs16(qint16_t a) +{ + return vdupq_n_s16(a); +} + +inline qint8x8_t vabs_qs8(qint8x8_t a) +{ + return vabs_s8(a); +} + +inline qint8x16_t vabsq_qs8(qint8x16_t a) +{ + return vabsq_s8(a); +} + +inline qint8x8_t vqabs_qs8(qint8x8_t a) +{ + return vqabs_s8(a); +} + +inline qint8x16_t vqabsq_qs8(qint8x16_t a) +{ + return vqabsq_s8(a); +} + +inline qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b) +{ + return vmax_s8(a, b); +} + +inline qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vmaxq_s8(a, b); +} + +inline qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b) +{ + return vpmax_s8(a, b); +} + +inline qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b) +{ + return vmin_s8(a, b); +} + +inline qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vminq_s8(a, b); +} + +inline qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b) +{ + return vpmin_s8(a, b); +} + +inline qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b) +{ + return vadd_s8(a, b); +} + +inline qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vaddq_s8(a, b); +} + +inline qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b) +{ + return vqadd_s8(a, b); +} + +inline qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vqaddq_s8(a, b); +} + +inline qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b) +{ + return vqadd_s16(a, b); +} + +inline qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b) +{ + return vqaddq_s16(a, b); +} + +inline int16x4_t vpaddl_qs8(qint8x8_t a) +{ + return vpaddl_s8(a); +} + +inline qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b) +{ + return vsub_s8(a, b); +} + +inline qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vsubq_s8(a, b); +} + +inline qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b) +{ + return vqsub_s8(a, b); +} + +inline qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vqsubq_s8(a, b); +} + +inline qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s8(res, a, b); + + // Shift right by fixed_point_position + res = vshlq_s16(res, fixed_point_position_s16); + + // Convert back to qint8 + return vmovn_s16(res); +} + +inline qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b)); + res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b)); + + // Shift right by fixed_point_position + res0 = vshlq_s16(res0, fixed_point_position_s16); + res1 = vshlq_s16(res1, fixed_point_position_s16); + + // Convert back to qint8 + return vcombine_s8(vmovn_s16(res0), vmovn_s16(res1)); +} + +inline qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s8(res, a, b); + + // Shift right by fixed_point_position + res = vqshlq_s16(res, fixed_point_position_s16); + + // Convert back to qint8 and saturate + return vqmovn_s16(res); +} + +inline qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b)); + res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b)); + + // Shift right by fixed_point_position + res0 = vqshlq_s16(res0, fixed_point_position_s16); + res1 = vqshlq_s16(res1, fixed_point_position_s16); + + // Convert back to qint8 and saturate + return vcombine_s8(vqmovn_s16(res0), vqmovn_s16(res1)); +} + +inline qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + qint16x8_t res = vmull_s8(a, b); + + return vqrshlq_s16(res, fixed_point_position_s16); +} + +inline qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s16(tmp, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vadd_s8(a, vmovn_s16(tmp)); +} + +inline qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c)); + tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c)); + + // Shift right by fixed_point_position + tmp0 = vshlq_s16(tmp0, fixed_point_position_s16); + tmp1 = vshlq_s16(tmp1, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vcombine_s8(vadd_s8(vget_low_s8(a), vmovn_s16(tmp0)), vadd_s8(vget_high_s8(a), vmovn_s16(tmp1))); +} + +inline qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s16(tmp, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vqadd_s8(a, vqmovn_s16(tmp)); +} + +inline qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c)); + tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c)); + + // Shift right by fixed_point_position + tmp0 = vqshlq_s16(tmp0, fixed_point_position_s16); + tmp1 = vqshlq_s16(tmp1, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + qint8x16_t res = vcombine_s8(vqmovn_s16(tmp0), vqmovn_s16(tmp1)); + return vqaddq_s8(a, res); +} + +inline qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s16(tmp, fixed_point_position_s16); + + // Accumulate + return vaddq_s16(a, tmp); +} + +inline qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s16(tmp, fixed_point_position_s16); + + // Accumulate + return vqaddq_s16(a, tmp); +} + +inline qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4x2_t res_f32 = + { + { + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f) + } + }; + + res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2); + res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2); + + const int32x4x2_t res_s32 = + { + { + vcvtq_s32_f32(res_f32.val[0]), + vcvtq_s32_f32(res_f32.val[1]), + } + }; + + const int16x8_t res_s16 = vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])); + + return vqmovn_s16(res_s16); +} + +inline qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4x4_t res_f32 = + { + { + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f) + } + }; + + res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2); + res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2); + res_f32.val[2] = vmlaq_f32(res_f32.val[2], a.val[2], pow2); + res_f32.val[3] = vmlaq_f32(res_f32.val[3], a.val[3], pow2); + + const int32x4x4_t res_s32 = + { + { + vcvtq_s32_f32(res_f32.val[0]), + vcvtq_s32_f32(res_f32.val[1]), + vcvtq_s32_f32(res_f32.val[2]), + vcvtq_s32_f32(res_f32.val[3]), + } + }; + + const int16x8x2_t res_s16 = + { + { + vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])), + vcombine_s16(vqmovn_s32(res_s32.val[2]), vqmovn_s32(res_s32.val[3])), + } + }; + + return vcombine_s8(vqmovn_s16(res_s16.val[0]), vqmovn_s16(res_s16.val[1])); +} + +inline float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + + const int16x8_t res_s16 = vmovl_s8(a); + + const int32x4x2_t res_s32 = + { + { + vmovl_s16(vget_low_s16(res_s16)), + vmovl_s16(vget_high_s16(res_s16)) + } + }; + + float32x4x2_t res_f32 = + { + { + vcvtq_f32_s32(res_s32.val[0]), + vcvtq_f32_s32(res_s32.val[1]) + } + }; + + res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2); + res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2); + + return res_f32; +} + +inline float32x4x4_t vcvtq_f32_qs8(qint8x16_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + + const int16x8x2_t res_s16 = + { + { + vmovl_s8(vget_low_s8(a)), + vmovl_s8(vget_high_s8(a)), + } + }; + + const int32x4x4_t res_s32 = + { + { + vmovl_s16(vget_low_s16(res_s16.val[0])), + vmovl_s16(vget_high_s16(res_s16.val[0])), + vmovl_s16(vget_low_s16(res_s16.val[1])), + vmovl_s16(vget_high_s16(res_s16.val[1])), + } + }; + + float32x4x4_t res_f32 = + { + { + vcvtq_f32_s32(res_s32.val[0]), + vcvtq_f32_s32(res_s32.val[1]), + vcvtq_f32_s32(res_s32.val[2]), + vcvtq_f32_s32(res_s32.val[3]) + } + }; + + res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2); + res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2); + res_f32.val[2] = vmulq_f32(res_f32.val[2], pow2); + res_f32.val[3] = vmulq_f32(res_f32.val[3], pow2); + + return res_f32; +} + +inline qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x8_t const_48_over_17 = vdup_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x8_t const_minus_32_over_17 = vdup_n_s8(-(0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + const qint8x8_t temp = vshl_s8(a, shift_value); + + qint8x8_t x = vadd_s8(const_48_over_17, vmul_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + uint8x8_t set_one = vcgt_s8(x, const_one); + x = vbsl_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vshl_s8(x, shift_value); +} + +inline qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + const qint8x16_t temp = vshlq_s8(a, shift_value); + + qint8x16_t x = vsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint8x16_t set_one = vcgtq_s8(x, const_one); + x = vbslq_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vshlq_s8(x, shift_value); +} + +inline qint8x16_t vqrecipq_qs8(qint8x16_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x16_t shift_value = vqnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + const qint8x16_t temp = vqshlq_s8(a, shift_value); + + qint8x16_t x = vqsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint8x16_t set_one = vcgtq_s8(x, const_one); + x = vbslq_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vqshlq_s8(x, shift_value); +} + +inline qint8x8_t vdiv_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + return vmul_qs8(a, vrecip_qs8(b, fixed_point_position), fixed_point_position); +} + +inline qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + return vmulq_qs8(a, vrecipq_qs8(b, fixed_point_position), fixed_point_position); +} + +template +inline qint8x8_t vtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); + const qint8x8_t const_one = vdup_n_s8(1); + const qint8x8_t A = vrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vadd_s8(shift_value, const_one) : shift_value); + const qint8x8_t B = vrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value); + const qint8x8_t C = vrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value); + const qint8x8_t D = vrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value); + const qint8x8_t x1 = vadd_s8(vmul_qs8(a, D, fixed_point_position), C); + const qint8x8_t x2 = vadd_s8(vmul_qs8(a, x1, fixed_point_position), B); + const qint8x8_t x3 = vadd_s8(vmul_qs8(a, x2, fixed_point_position), A); + const qint8x8_t res = vmul_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x8_t vqtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); + const qint8x8_t const_one = vdup_n_s8(1); + const qint8x8_t A = vqrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vqadd_s8(shift_value, const_one) : shift_value); + const qint8x8_t B = vqrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value); + const qint8x8_t C = vqrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value); + const qint8x8_t D = vqrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value); + const qint8x8_t x1 = vqadd_s8(vqmul_qs8(a, D, fixed_point_position), C); + const qint8x8_t x2 = vqadd_s8(vqmul_qs8(a, x1, fixed_point_position), B); + const qint8x8_t x3 = vqadd_s8(vqmul_qs8(a, x2, fixed_point_position), A); + const qint8x8_t res = vqmul_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position)); + const qint8x16_t const_one = vdupq_n_s8(1); + const qint8x16_t A = vrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vaddq_s8(shift_value, const_one) : shift_value); + const qint8x16_t B = vrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value); + const qint8x16_t C = vrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value); + const qint8x16_t D = vrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value); + const qint8x16_t x1 = vaddq_s8(vmulq_qs8(a, D, fixed_point_position), C); + const qint8x16_t x2 = vaddq_s8(vmulq_qs8(a, x1, fixed_point_position), B); + const qint8x16_t x3 = vaddq_s8(vmulq_qs8(a, x2, fixed_point_position), A); + const qint8x16_t res = vmulq_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x16_t vqtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position)); + const qint8x16_t const_one = vdupq_n_s8(1); + const qint8x16_t A = vqrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vqaddq_s8(shift_value, const_one) : shift_value); + const qint8x16_t B = vqrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value); + const qint8x16_t C = vqrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value); + const qint8x16_t D = vqrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value); + const qint8x16_t x1 = vqaddq_s8(vqmulq_qs8(a, D, fixed_point_position), C); + const qint8x16_t x2 = vqaddq_s8(vqmulq_qs8(a, x1, fixed_point_position), B); + const qint8x16_t x3 = vqaddq_s8(vqmulq_qs8(a, x2, fixed_point_position), A); + const qint8x16_t res = vqmulq_qs8(a, x3, fixed_point_position); + return res; +} + +inline qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(fixed_point_position - 7); + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_ln2 = vqrshl_s8(vdup_n_s8(0x58), shift_value); // ln(2) + const qint8x8_t const_inv_ln2 = vorr_s8(vqrshl_s8(vdup_n_s8(0x38), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint8x8_t m = vqmul_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint8x8_t dec_m = vqshl_s8(m, vdup_n_s8(-fixed_point_position)); + + qint8x8_t alpha = vqmul_qs8(vqshl_s8(dec_m, vdup_n_s8(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabs_qs8(vqsub_s8(a, alpha)); + + // Polynomial Approximation + qint8x8_t poly = vqtaylor_poly_qs8(alpha, fixed_point_position); + poly = vqadd_s8(poly, const_one); + + // Reconstruct + poly = vqshl_s8(poly, dec_m); + + return poly; +} + +inline qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(fixed_point_position - 7); + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_ln2 = vqrshlq_s8(vdupq_n_s8(0x58), shift_value); // ln(2) + const qint8x16_t const_inv_ln2 = vorrq_s8(vqrshlq_s8(vdupq_n_s8(0x38), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint8x16_t m = vqmulq_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint8x16_t dec_m = vqshlq_s8(m, vdupq_n_s8(-fixed_point_position)); + + qint8x16_t alpha = vqmulq_qs8(vqshlq_s8(dec_m, vdupq_n_s8(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabsq_qs8(vqsubq_qs8(a, alpha)); + + // Polynomial Approximation + qint8x16_t poly = vqtaylor_polyq_qs8(alpha, fixed_point_position); + poly = vqaddq_s8(poly, const_one); + + // Reconstruct + poly = vqshlq_s8(poly, dec_m); + + return poly; +} + +inline qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_seven_dec = vdup_n_s8(7); + const qint8x8_t const_ln2 = vdup_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint8x8_t calc_reciprocal = vclt_s8(a, const_one); + qint8x8_t recip = vdup_n_s8(0); + recip = vbsl_s8(calc_reciprocal, recip, a); + + // Calculate reciprocal + recip = vrecip_qs8(recip, fixed_point_position); + a = vbsl_s8(calc_reciprocal, recip, a); + + // Get decimal part of a + qint8x8_t shift_value = vdup_n_s8(-fixed_point_position); + qint8x8_t dec_a = vshl_s8(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vsub_s8(const_seven_dec, vclz_s8(dec_a)); + + // Get x to range (1, 2] + const qint8x8_t shift_value_neg = vneg_s8(shift_value); + const qint8x8_t temp = vsub_s8(vrshl_s8(a, shift_value_neg), const_one); + const qint8x8_t sum = vmul_s8(shift_value, const_one); + + // Polynomial Approximation + qint8x8_t poly = vtaylor_poly_qs8(temp, fixed_point_position); + + // Reconstruct + poly = vmul_qs8(vadd_s8(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbsl_s8(calc_reciprocal, vneg_s8(poly), poly); + + return poly; +} + +inline qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_seven_dec = vdupq_n_s8(7); + const qint8x16_t const_ln2 = vdupq_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint8x16_t calc_reciprocal = vcltq_s8(a, const_one); + qint8x16_t recip = vdupq_n_s8(0); + recip = vbslq_s8(calc_reciprocal, a, recip); + + // Calculate reciprocal + recip = vrecipq_qs8(recip, fixed_point_position); + a = vbslq_s8(calc_reciprocal, recip, a); + + // Get decimal part of a + qint8x16_t shift_value = vdupq_n_s8(-fixed_point_position); + qint8x16_t dec_a = vshlq_s8(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vsubq_s8(const_seven_dec, vclzq_s8(dec_a)); + + // Get x to range (1, 2] + const qint8x16_t shift_value_neg = vnegq_s8(shift_value); + const qint8x16_t temp = vsubq_s8(vrshlq_s8(a, shift_value_neg), const_one); + const qint8x16_t sum = vmulq_s8(shift_value, const_one); + + // Polynomial Approximation + qint8x16_t poly = vtaylor_polyq_qs8(temp, fixed_point_position); + + // Reconstruct + poly = vmulq_qs8(vaddq_s8(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbslq_s8(calc_reciprocal, vnegq_s8(poly), poly); + + return poly; +} + +inline qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); + uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); + temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp); + qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); + + temp = vshl_s8(a, shift_value); + + // Initial guess + qint8x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshl_s8(x, shift_value2); +} + +inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x8_t shift_value = vneg_s8(vqsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); + uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); + temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp); + qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); + + temp = vshl_s8(a, shift_value); + + // Initial guess + qint8x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshl_s8(x, shift_value2); +} + +inline qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); + uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); + temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp); + qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); + + temp = vshlq_s8(a, shift_value); + + // Initial guess + qint8x16_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshlq_s8(x, shift_value2); +} + +inline qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x16_t shift_value = vnegq_s8(vqsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); + uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); + temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp); + qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); + + temp = vshlq_s8(a, shift_value); + + // Initial guess + qint8x16_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshlq_s8(x, shift_value2); +} + +inline qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_two = vdup_n_s8(2 << fixed_point_position); + + qint8x8_t exp2x = vqexp_qs8(vqmul_qs8(const_two, a, fixed_point_position), fixed_point_position); + qint8x8_t num = vqsub_qs8(exp2x, const_one); + qint8x8_t den = vqadd_qs8(exp2x, const_one); + qint8x8_t tanh = vqmul_qs8(num, vrecip_qs8(den, fixed_point_position), fixed_point_position); + + return tanh; +} + +inline qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_two = vdupq_n_s8(2 << fixed_point_position); + + qint8x16_t exp2x = vqexpq_qs8(vqmulq_qs8(const_two, a, fixed_point_position), fixed_point_position); + qint8x16_t num = vqsubq_qs8(exp2x, const_one); + qint8x16_t den = vqaddq_qs8(exp2x, const_one); + qint8x16_t tanh = vqmulq_qs8(num, vqrecipq_qs8(den, fixed_point_position), fixed_point_position); + + return tanh; +} + +inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position); +} +} diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h new file mode 100644 index 0000000000..eaa50f123b --- /dev/null +++ b/arm_compute/core/NEON/NEKernels.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEKERNELS_H__ +#define __ARM_COMPUTE_NEKERNELS_H__ + +/* Header regrouping all the NEON kernels */ +#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" +#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" +#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" +#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" +#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NERemapKernel.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" +#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" + +#endif /* __ARM_COMPUTE_NEKERNELS_H__ */ diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h new file mode 100644 index 0000000000..bb8a330c1e --- /dev/null +++ b/arm_compute/core/NEON/NEMath.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMATH_H__ +#define __ARM_COMPUTE_NEMATH_H__ + +#include + +namespace arm_compute +{ +/** Calculate inverse square root. + * + * @param[in] x Input value. + * + * @return The calculated inverse square root. + */ +float32x4_t vinvsqrtq_f32(float32x4_t x); + +/** Calculate reciprocal. + * + * @param[in] x Input value. + * + * @return The calculated reciprocal. + */ +float32x4_t vinvq_f32(float32x4_t x); + +/** Perform a 7th degree polynomial approximation using Estrin's method. + * + * @param[in] x Input vector value in F32 format. + * @param[in] coeffs Polynomial coefficients table. + * + * @return The calculated approximation. + */ +float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array &coeffs); + +/** Calculate exponential + * + * @param[in] x Input vector value in F32 format. + * + * @return The calculated exponent. + */ +float32x4_t vexpq_f32(float32x4_t x); + +/** Calculate logarithm + * + * @param[in] x Input vector value in F32 format. + * + * @return The calculated logarithm. + */ +float32x4_t vlogq_f32(float32x4_t x); + +/** Calculate hyperbolic tangent. + * + * tanh(x) = (e^2x - 1)/(e^2x + 1) + * + * @note We clamp x to [-5,5] to avoid overflowing issues. + * + * @param[in] val Input vector value in F32 format. + * + * @return The calculated Hyperbolic Tangent. + */ +float32x4_t vtanhq_f32(float32x4_t val); + +/** Calculate n power of a number. + * + * pow(x,n) = e^(n*log(x)) + * + * @param[in] val Input vector value in F32 format. + * @param[in] n Powers to raise the input to. + * + * @return The calculated power. + */ +float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); +} +#include "arm_compute/core/NEON/NEMath.inl" +#endif /* __ARM_COMPUTE_NEMATH_H__ */ diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl new file mode 100644 index 0000000000..a31a4c0dc5 --- /dev/null +++ b/arm_compute/core/NEON/NEMath.inl @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +namespace arm_compute +{ +/* Exponent polynomial coefficients */ +const std::array exp_tab = +{ + { + vdupq_n_f32(1.f), + vdupq_n_f32(0.0416598916054f), + vdupq_n_f32(0.500000596046f), + vdupq_n_f32(0.0014122662833f), + vdupq_n_f32(1.00000011921f), + vdupq_n_f32(0.00833693705499f), + vdupq_n_f32(0.166665703058f), + vdupq_n_f32(0.000195780929062f), + } +}; + +/* Logarithm polynomial coefficients */ +const std::array log_tab = +{ + { + vdupq_n_f32(-2.29561495781f), + vdupq_n_f32(-2.47071170807f), + vdupq_n_f32(-5.68692588806f), + vdupq_n_f32(-0.165253549814f), + vdupq_n_f32(5.17591238022f), + vdupq_n_f32(0.844007015228f), + vdupq_n_f32(4.58445882797f), + vdupq_n_f32(0.0141278216615f), + } +}; + +inline float32x4_t vinvsqrtq_f32(float32x4_t x) +{ + float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} + +inline float32x4_t vinvq_f32(float32x4_t x) +{ + float32x4_t recip = vrecpeq_f32(x); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + return recip; +} + +inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array &coeffs) +{ + float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x); + float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x); + float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x); + float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x); + float32x4_t x2 = vmulq_f32(x, x); + float32x4_t x4 = vmulq_f32(x2, x2); + float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4); + return res; +} + +inline float32x4_t vexpq_f32(float32x4_t x) +{ + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + static const float32x4_t CONST_INV_LN2 = vdupq_n_f32(1.4426950408f); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2)); + float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2); + + // Polynomial Approximation + float32x4_t poly = vtaylor_polyq_f32(val, exp_tab); + + // Reconstruct + poly = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(poly), vshlq_n_s32(m, 23))); + + return poly; +} + +inline float32x4_t vlogq_f32(float32x4_t x) +{ + static const int32x4_t CONST_127 = vdupq_n_s32(127); // 127 + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + + // Extract exponent + int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127); + float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23))); + + // Polynomial Approximation + float32x4_t poly = vtaylor_polyq_f32(val, log_tab); + + // Reconstruct + poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2); + + return poly; +} + +inline float32x4_t vtanhq_f32(float32x4_t val) +{ + static const float32x4_t CONST_1 = vdupq_n_f32(1.f); + static const float32x4_t CONST_2 = vdupq_n_f32(2.f); + static const float32x4_t CONST_MIN_TANH = vdupq_n_f32(-10.f); + static const float32x4_t CONST_MAX_TANH = vdupq_n_f32(10.f); + + float32x4_t x = vminq_f32(vmaxq_f32(val, CONST_MIN_TANH), CONST_MAX_TANH); + float32x4_t exp2x = vexpq_f32(vmulq_f32(CONST_2, x)); + float32x4_t num = vsubq_f32(exp2x, CONST_1); + float32x4_t den = vaddq_f32(exp2x, CONST_1); + float32x4_t tanh = vmulq_f32(num, vinvq_f32(den)); + return tanh; +} + +inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) +{ + return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); +} +} \ No newline at end of file diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..9ef93ce67a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the absolute difference kernel + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class NEAbsoluteDifferenceKernel : public INEKernel +{ +public: + /** Default constructor */ + NEAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~NEAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output tensors + * + * @param[in] input1 Source tensor. Data types supported: U8/S16 + * @param[in] input2 Source tensor. Data types supported: U8/S16 + * @param[out] output Destination tensor, Data types supported: U8/S16 + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised absolute difference functions + * + * @param[in] input1 An input tensor. Data types supported: U8/S16. + * @param[in] input2 An input tensor. Data types supported: U8/S16. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. + * @param[in] window Region on which to execute the kernel. + */ + using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + + /** Absolute difference function to use for the particular tensor formats passed to configure() */ + AbsDiffFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h new file mode 100644 index 0000000000..df6d7b8891 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_NEACCUMULATEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the accumulate kernel + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class NEAccumulateKernel : public INESimpleKernel +{ +public: + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] accum Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; +}; + +/** Interface for the accumulate weighted kernel + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class NEAccumulateWeightedKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEAccumulateWeightedKernel(); + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha Scalar value in the range [0.0f, 1.0f] + * @param[in,out] accum Accumulated tensor. Data type supported: U8. + */ + void configure(const ITensor *input, float alpha, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + float _alpha; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Interface for the accumulate weighted kernel using F16 */ +class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +#else +using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; +#endif + +/** Interface for the accumulate squared kernel + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class NEAccumulateSquaredKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEAccumulateSquaredKernel(); + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift Shift value in the range of [0, 15] + * @param[in,out] accum Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + uint32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h new file mode 100644 index 0000000000..97f92d6a1e --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/FixedPoint.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the activation layer kernel. */ +class NEActivationLayerKernel : public INESimpleKernel +{ +public: + /** Constructor */ + NEActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; + /** Default move constructor */ + NEActivationLayerKernel(NEActivationLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; + /** Default move assignment operator */ + NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer information. + */ + void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using ActivationFunction = ActivationLayerInfo::ActivationFunction; + /** Common signature for all the specialised @ref NEActivationLayerKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); + +private: + ActivationFunctionExecutorPtr _func; + ActivationLayerInfo _act_info; +}; +} +#endif /*__ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h new file mode 100644 index 0000000000..b36ca46e1a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ +#define __ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEArithmeticAdditionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticAdditionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). + * @param[in] policy Overflow policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised add functions + * + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32. + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). + * @param[in] window Region on which to execute the kernel. + */ + using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + /** Add function to use for the particular tensor types passed to configure() */ + AddFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..0eb9c23686 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ +#define __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform subtraction between two tensors */ +class NEArithmeticSubtractionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). + * @param[in] policy Overflow policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised sub functions + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32) + * @param[in] window Region on which to execute the kernel. + */ + using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + SubFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..29fcbd26a0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the batch normalization layer kernel. + */ +class NEBatchNormalizationLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using BatchNormFunction = void(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window); + BatchNormFunction *_func; + const ITensor *_input; + ITensor *_output; + const ITensor *_mean; + const ITensor *_var; + const ITensor *_gamma; + const ITensor *_beta; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h new file mode 100644 index 0000000000..b931445419 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class NEBitwiseAndKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h new file mode 100644 index 0000000000..e34eb0f5ae --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise NOT operation + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class NEBitwiseNotKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseNotKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default; + /** Initialise the kernel's input and output + * + * @param[in] input An input tensor. Data type supported: U8. + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h new file mode 100644 index 0000000000..d2bae2660c --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEORKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEORKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise inclusive OR between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class NEBitwiseOrKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h new file mode 100644 index 0000000000..9dea36e7e3 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class NEBitwiseXorKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h new file mode 100644 index 0000000000..6b7bebbf17 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBOX3x3KERNEL_H__ +#define __ARM_COMPUTE_NEBOX3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Box 3x3 filter */ +class NEBox3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform a Box 3x3 filter using F16 simd + */ +class NEBox3x3FP16Kernel : public NEBox3x3Kernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +#else +using NEBox3x3FP16Kernel = NEBox3x3Kernel; +#endif +} +#endif /*__ARM_COMPUTE_NEBOX3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h new file mode 100644 index 0000000000..b86085f439 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECANNYEDGEKERNEL_H__ +#define __ARM_COMPUTE_NECANNYEDGEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Computes magnitude and quantised phase from inputs gradients. */ +class NEGradientKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel(const NEGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel &operator=(const NEGradientKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGradientKernel(NEGradientKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGradientKernel &operator=(NEGradientKernel &&) = default; + /** Default destructor */ + virtual ~NEGradientKernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and magnitude must all be the same size (either 16 or 32) + * + * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. + * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). + * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + */ + virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + /** Common signature for all the specialised gradient functions + * + * @param[in] gx_ptr Pointer to the first input tensor. + * @param[in] gy_ptr Pointer to the second input tensor. + * @param[out] magnitude_ptr Pointer to the first output tensor + * @param[out] phase_ptr Pointer to the second output tensor + */ + using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); + + GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ + const ITensor *_gx; /**< Source tensor - Gx component */ + const ITensor *_gy; /**< Source tensor - Gy component */ + ITensor *_magnitude; /**< Destination tensor - Magnitude */ + ITensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform Gradient computation + */ +class NEGradientFP16Kernel : public NEGradientKernel +{ +public: + // Inherited methods overriden: + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override; +}; +#else /* ARM_COMPUTE_ENABLE_FP16 */ +using NEGradientFP16Kernel = NEGradientKernel; +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +/** NEON kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. + * + * @note Hysteresis is computed in @ref NEEdgeTraceKernel + */ +class NEEdgeNonMaxSuppressionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Default destructor */ + ~NEEdgeNonMaxSuppressionKernel() = default; + + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Common signature for all the specialised non-maxima suppression functions + * + * @param[in] magnitude_ptr Pointer to the first input tensor. + * @param[in] phase_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] stride_mag Stride of the magnitude tensor + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + */ + using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, + const int32_t lower_thr); + + EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_magnitude; /**< Source tensor - Magnitude */ + const ITensor *_phase; /**< Source tensor - Quantized phase */ + ITensor *_output; /**< Destination tensor */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ +}; + +/** NEON kernel to perform Edge tracing */ +class NEEdgeTraceKernel : public INEKernel +{ +public: + /** Default constructor */ + NEEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; + /** Default constructor */ + ~NEEdgeTraceKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; + +private: + ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h new file mode 100644 index 0000000000..8b669a4d28 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ +#define __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel combine kernel */ +class NEChannelCombineKernel : public INEKernel +{ +public: + /** Default constructor */ + NEChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel(NEChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; + /** Default destructor */ + ~NEChannelCombineKernel() = default; + + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + /** Combine 3 planes to form a three channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_3C(const Window &win); + /** Combine 4 planes to form a four channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_4C(const Window &win); + /** Combine 3 planes to form a single plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + template + void combine_YUV_1p(const Window &win); + /** Combine 3 planes to form a two plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_2p(const Window &win); + /** Combine 3 planes to form a three plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_3p(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win, uint32_t plane_id); + /** Common signature for all the specialised ChannelCombine functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); + /** ChannelCombine function to use for the particular tensor types passed to configure() */ + ChannelCombineFunction _func; + std::array _planes; + ITensor *_output; + IMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; + unsigned int _num_elems_processed_per_iteration; + bool _is_parallelizable; +}; +} +#endif /* __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h new file mode 100644 index 0000000000..0715e1f8cb --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ +#define __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel extract kernel */ +class NEChannelExtractKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel(NEChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; + /** Default destructor */ + ~NEChannelExtractKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Format supported: u8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar destination image. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Extract one channel from a two channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_2C_img(const Window &win); + /** Extract one channel from a three channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_3C_img(const Window &win); + /** Extract one channel from a four channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_4C_img(const Window &win); + /** Extract U/V channel from a single planar YUVY/UYVY tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_YUYV_uv(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win); + /** Common signature for all the specialised ChannelExtract functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); + /** ChannelExtract function to use for the particular tensor types passed to configure() */ + ChannelExtractFunction _func; + unsigned int _lut_index; +}; +} +#endif /* __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h new file mode 100644 index 0000000000..f6bc2152da --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECOL2IMKERNEL_H__ +#define __ARM_COMPUTE_NECOL2IMKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform col2im reshaping. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class NECol2ImKernel : public INEKernel +{ +public: + /** Default constructor */ + NECol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel(const NECol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel &operator=(const NECol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + NECol2ImKernel(NECol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + NECol2ImKernel &operator=(NECol2ImKernel &&) = default; + /** Default destructor */ + ~NECol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ITensor *input, ITensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Template function to run the col2im + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_col2im(const Window &window); + + /** Common signature for all the specialised col2im functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window); + + Col2ImFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_NECOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h new file mode 100644 index 0000000000..2297218117 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_COLORCONVERTKERNEL_H__ +#define __ARM_COMPUTE_COLORCONVERTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the color convert kernel */ +class NEColorConvertKernel : public INEKernel +{ +public: + /** Default constructor */ + NEColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel(const NEColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEColorConvertKernel(NEColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; + /** Default destructor */ + ~NEColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/) + */ + void configure(const ITensor *input, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const IMultiImage *input, IImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const IImage *input, IMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const IMultiImage *input, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); + const void *_input; + void *_output; + ColorConvertFunction *_func; +}; +} +#endif /*__ARM_COMPUTE_NECOLORCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h new file mode 100644 index 0000000000..588a228a5d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ +#define __ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class NEConvolutionKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEConvolutionKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + template + void convolution(const Window &win); + +protected: + uint32_t _scale; /**< scale of the convolution */ + std::array _convolution; /**< convolution matrix */ +}; + +/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ +using NEConvolution3x3Kernel = NEConvolutionKernel<3>; +/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ +using NEConvolution5x5Kernel = NEConvolutionKernel<5>; +/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ +using NEConvolution7x7Kernel = NEConvolutionKernel<7>; +///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ +using NEConvolution9x9Kernel = NEConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution */ +template +class NESeparableConvolutionHorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NESeparableConvolutionHorKernel(); + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U16, S16, S32. + * @param[in] conv_row Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor.. + * + * @param[in] window Window to apply the convolution on. + */ + template + void convolve(const Window &window); + + std::array _conv_row; /**< Convolution coefficients */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution */ +template +class NESeparableConvolutionVertKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NESeparableConvolutionVertKernel(); + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U16, S16, S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv_col Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as U16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_u16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S32. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s32(const Window &win); + + std::array _conv_col; /**< Convolution coefficients */ + uint32_t _scale; /**< Convolution's scale */ +}; + +/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ +using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ +using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ +using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class NEConvolutionRectangleKernel : public INEKernel +{ +public: + /** Default constructor */ + NEConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + unsigned int get_index(uint32_t val); + /** Apply the object's convolution to the given window of the input tensor. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution(const Window &win); + +protected: + const ITensor *_input; /**< Input tensor */ + ITensor *_output; /**< Output tensor */ + uint32_t _scale; /**< Scale of the convolution */ + std::vector _convolution; /**< Convolution matrix */ + BorderSize _border_size; /**< Calculated border width */ + uint32_t _func_idx; /**< Index used to specify convolution function to be used */ + const static unsigned int _nr_supported_sizes + { + 4 + }; /**< Number of supported permutations */ +}; +} +#endif /*__ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h new file mode 100644 index 0000000000..67b8c6052d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ +#define __ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class IDistribution1D; +class ILut; +class ITensor; +using IImage = ITensor; + +/** Interface for the cumulative distribution (cummulative summmation) calculation kernel. + * + * This kernel calculates the cumulative sum of a given distribution (meaning that each output element + * is the sum of all its previous elements including itself) and creates a lookup table with the normalized + * pixel intensities which is used for improve the constrast of the image. + */ +class NECumulativeDistributionKernel : public INEKernel +{ +public: + /** Default constructor */ + NECumulativeDistributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default; + /** Set the input and output distribution. + * + * @param[in] input Input image. Data type supported: U8 + * @param[in] distribution Unnormalized 256-bin distribution of the input image. + * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution. + * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements. + */ + void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; /**< Input image. */ + const IDistribution1D *_distribution; /**< Input histogram of the input image. */ + IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */ + ILut *_output; /**< Output with the equalization lookup table. */ +private: + static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */ +}; +} + +#endif /*__ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h new file mode 100644 index 0000000000..7384cd1f02 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEDepthConcatenateKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateKernel(const NEDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateKernel &operator=(const NEDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthConcatenateKernel(NEDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthConcatenateKernel &operator=(NEDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~NEDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: F32. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + ITensor *_output; + int _top_bottom; + int _left_right; + unsigned int _depth_offset; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h new file mode 100644 index 0000000000..0c5c29e4db --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Depth conversion kernel */ +class NEDepthConvertKernel : public INESimpleKernel +{ +public: + /** Default constructor*/ + NEDepthConvertKernel(); + /** Set the input and output of the kernel + * + * Valid conversions Input -> Output : + * + * - QS8 -> F32 + * - U8 -> U16, S16, S32 + * - U16 -> U8, U32 + * - S16 -> U8, S32 + * - F32 -> QS8 + * + * + * @param[in] input The input tensor to convert. Data types supported: U8/QS8/U16/S16/F32. + * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + ConvertPolicy _policy; + uint32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h new file mode 100644 index 0000000000..abb8a894c0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ +#define __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. + * + */ +class NEDerivativeKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel(const NEDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDerivativeKernel(NEDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform derivative along the X direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_x(const Window &window); + /** Function to perform derivative along the Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_y(const Window &window); + /** Function to perform derivative along the X and Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_xy(const Window &window); + /** Common signature for all the specialised derivative functions + * + * @param[in] window Region on which to execute the kernel. + */ + using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); + /** Derivative function to use for the particular tensor types passed to configure() */ + DerivativeFunction _func; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ +}; +} +#endif /* __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h new file mode 100644 index 0000000000..05f148a1fd --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDilateKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDILATEKERNEL_H__ +#define __ARM_COMPUTE_NEDILATEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image dilatation */ +class NEDilateKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEDILATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h new file mode 100644 index 0000000000..f098e18655 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to accumulate the biases to each element of the input tensor + * + * @note We assume bias to be shared + */ +class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDirectConvolutionLayerBiasAccumulateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerBiasAccumulateKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * Data type supported: QS8/F32 + * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * Data type supported: Same as @p input + */ + void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output); + +private: + BiasAccumulateKernel *_func; + ITensor *_input; + const ITensor *_bias; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..d726071606 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON interface for Direct Convolution Layer kernel */ +class NEDirectConvolutionLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerKernel() = default; + /** Set the input, weights and output tensors. + * + * @param[in] input Input tensor. Data types supported: QS8/F32. + * @param[in] weights Set of kernels to convolve the input volume. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported: Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + const ITensor *_weights; + ITensor *_output; + PadStrideInfo _conv_info; + BorderSize _border_size; + unsigned int _kernel_size; + unsigned int _num_elems_read_per_iteration; + unsigned int _num_elems_written_per_iteration; +}; +} +#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h new file mode 100644 index 0000000000..86dc217cc0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEErodeKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEERODEKERNEL_H__ +#define __ARM_COMPUTE_NEERODEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image erosion */ +class NEErodeKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEERODEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h new file mode 100644 index 0000000000..d9bd6acde9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFASTCORNERSKERNEL_H__ +#define __ARM_COMPUTE_NEFASTCORNERSKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** NEON kernel to perform fast corners */ +class NEFastCornersKernel : public INEKernel +{ +public: + /** Constructor */ + NEFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel(const NEFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFastCornersKernel(NEFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Output image. Data type supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IImage *_input; /**< source image */ + IImage *_output; /**< inermediate results */ + uint8_t _threshold; /**< threshold on difference between intensity */ + bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ +}; +} +#endif diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h new file mode 100644 index 0000000000..8e0846ea88 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLARRAYKERNEL_H__ +#define __ARM_COMPUTE_NEFILLARRAYKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */ +class NEFillArrayKernel : public INEKernel +{ +public: + /** Default contructor */ + NEFillArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel(const NEFillArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillArrayKernel(NEFillArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default; + /** Default detructor */ + ~NEFillArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[in] threshold Texels greater than the threshold will be added to the array. + * @param[out] output Arrays of keypoints to store the results. + */ + void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; + IKeyPointArray *_output; + uint8_t _threshold; +}; +} +#endif diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h new file mode 100644 index 0000000000..3ec66115e2 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_NEFILLBORDERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to fill borders */ +class NEFillBorderKernel : public INEKernel +{ +public: + /** Default Constructor */ + NEFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel(const NEFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillBorderKernel(NEFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default; + /** Default destructor */ + ~NEFillBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QS8/QS16/S16/S32/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + template + void fill_replicate_single_channel(const Window &window); + template + void fill_constant_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + BorderMode _mode; + PixelValue _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_NEFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h new file mode 100644 index 0000000000..61e6e46463 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ +#define __ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to fill the interior borders */ +class NEFillInnerBorderKernel : public INEKernel +{ +public: + /** Default constructor */ + NEFillInnerBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillInnerBorderKernel(const NEFillInnerBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillInnerBorderKernel &operator=(const NEFillInnerBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillInnerBorderKernel(NEFillInnerBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillInnerBorderKernel &operator=(NEFillInnerBorderKernel &&) = default; + /** Default destructor */ + ~NEFillInnerBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] input Tensor to process. Data types supported: U8/QS8/S16/S32/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, BorderSize border_size, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + template + void fill_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + PixelValue _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..b9884ffb57 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__ +#define __ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to interleave the elements of a matrix + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class NEGEMMInterleave4x4Kernel : public INESimpleKernel +{ +public: + /* Constructor */ + NEGEMMInterleave4x4Kernel(); + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the transpose functions + * + * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + */ + using GEMMInterleaveFunction = void(const ITensor *input, ITensor *output, const Window &window); + + GEMMInterleaveFunction *_func; /**< GEMM interleave function to use for the particular tensor types passed to configure() */ +}; +} +#endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..ba4dcc3373 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply matrices + * + * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + */ +class NEGEMMLowpMatrixMultiplyKernel : public INEKernel +{ +public: + /** Constructor */ + NEGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two + * kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8 + * @param[in] input1 Input tensor containing the transposed Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication, Data type supported: same as @p input0 + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Value to be multipied to each entry of the result. + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + int32_t _a_offset; + int32_t _b_offset; + int32_t _output_offset; + int32_t _output_mult_int; + int32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..c0ecafcd39 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to add a bias to each row of the input tensor */ +class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixAccumulateBiasesKernel &operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Default destructor */ + ~NEGEMMMatrixAccumulateBiasesKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data type supported: QS8/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input + */ + void configure(ITensor *accum, const ITensor *biases); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + ITensor *_accum; + const ITensor *_biases; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..1ab52fa2f2 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: + * + * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size + * + * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have: + * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel + * - MTX_1 = C + */ +class NEGEMMMatrixAdditionKernel : public INESimpleKernel +{ +public: + /** Constructor */ + NEGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: QS8/F16/F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *input, ITensor *output, float beta); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the matrix addition functions + * + * @param[in] input An input tensor. Data types supported: QS8/F16/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + * @param[in] beta Weight of matrix C + */ + using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta); + /** Matrix addition function to use for the particular tensor types passed to configure() */ + MatrixAdditionFunction *_func; + float _beta; +}; +} +#endif /* __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..a684945828 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + */ +class NEGEMMMatrixMultiplyKernel : public INEKernel +{ +public: + /** Constructor */ + NEGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + float _alpha; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..5d8a3697cb --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) + * + * Following an example of how the transposition1xW works when the input data is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a7 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & 17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & 27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & 37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + * + */ +class NEGEMMTranspose1xWKernel : public INESimpleKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h new file mode 100644 index 0000000000..763fab88f6 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 3x3 filter */ +class NEGaussian3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: S16 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h new file mode 100644 index 0000000000..86b28907da --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */ +class NEGaussian5x5HorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEGaussian5x5HorKernel(); + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; +}; + +/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */ +class NEGaussian5x5VertKernel : public INESimpleKernel +{ +public: + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h new file mode 100644 index 0000000000..40a6aa7375 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a GaussianPyramid (horizontal pass) */ +class NEGaussianPyramidHorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + int _l2_load_offset; +}; + +/** NEON kernel to perform a GaussianPyramid (vertical pass) */ +class NEGaussianPyramidVertKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h new file mode 100644 index 0000000000..dd85778b8a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ +#define __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG Orientation Binning */ +class NEHOGOrientationBinningKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~NEHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor + * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor + * @param[out] output_ptr Pointer to the output cell of hog space tensor + * @param[in] mag_stride Stride of the magnitude tensor + * @param[in] phase_stride Stride of the phase tensor + * @param[in] cell_width Width of the cell + * @param[in] cell_height Height of the cell + * @param[in] num_bins Number of bins for each cell + * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index + */ + using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, + size_t cell_height, size_t num_bins, float phase_scale); + /** Orientation binning function to use for the particular cell width passed to configure() */ + OrientBinFunc *_func; + const ITensor *_input_magnitude; + const ITensor *_input_phase; + ITensor *_output; + size_t _cell_width; + size_t _cell_height; + size_t _num_bins; + float _phase_scale; +}; + +/** NEON kernel to perform HOG block normalization */ +class NEHOGBlockNormalizationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~NEHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor + * @param[out] output_ptr Pointer to the output block of the hog normalized space + * @param[in] input_stride Stride of the input hog space tensor + * @param[in] num_cells_per_block_height Number of cells per block along the Y direction + * @param[in] num_bins_block_x Number of bins per block along the X direction + * @param[in] num_bins_block Number of total bins per block + * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization + */ + using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, + float l2_hyst_threshold); + /** Block normalization function to use for the particular normalization type passed to configure() */ + BlockNormFunc *_func; + const ITensor *_input; + ITensor *_output; + Size2D _num_cells_per_block; + Size2D _num_cells_per_block_stride; + size_t _num_bins; + float _l2_hyst_threshold; +}; +} +#endif /* __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h new file mode 100644 index 0000000000..e56d1e5fd8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ +#define __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG detector kernel using linear SVM */ +class NEHOGDetectorKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGDetectorKernel(NEHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = default; + /** Default destructor */ + ~NEHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; + IDetectionWindowArray *_detection_windows; + const float *_hog_descriptor; + float _bias; + float _threshold; + uint16_t _idx_class; + size_t _num_bins_per_descriptor_x; + size_t _num_blocks_per_descriptor_y; + size_t _block_stride_width; + size_t _block_stride_height; + size_t _detection_window_width; + size_t _detection_window_height; + size_t _max_num_detection_windows; + std::mutex _mutex; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h new file mode 100644 index 0000000000..0abd73ef97 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ +#define __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Common interface for all Harris Score kernels */ +class INEHarrisScoreKernel : public INEKernel +{ +public: + /** Default constructor */ + INEHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; + /** Default destructor */ + ~INEHarrisScoreKernel() = default; + +public: + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 + * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 + * @param[out] output Destination image (harris score). Data types supported: F32 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; + +protected: + const IImage *_input1; /**< Source image - Gx component */ + const IImage *_input2; /**< Source image - Gy component */ + IImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; + +/** Template NEON kernel to perform Harris Score. + * The implementation supports 3, 5, and 7 for the block_size + */ +template +class NEHarrisScoreKernel : public INEHarrisScoreKernel +{ +public: + /** Default constructor */ + NEHarrisScoreKernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window) override; + +private: + /** Common signature for all the specialised harris score functions */ + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Interface for the accumulate Weighted kernel using F16 */ +template +class NEHarrisScoreFP16Kernel : public INEHarrisScoreKernel +{ +public: + /** Default constructor */ + NEHarrisScoreFP16Kernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window) override; + +private: + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; +#else +template +using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel; +#endif +} +#endif /* __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h new file mode 100644 index 0000000000..c4dbbeae83 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHistogramKernel.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ +#define __ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class IDistribution1D; +class ITensor; +using IImage = ITensor; + +/** Interface for the histogram kernel */ +class NEHistogramKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHistogramKernel(); + /** Default destructor */ + ~NEHistogramKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel(const NEHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHistogramKernel(NEHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHistogramKernel &operator=(NEHistogramKernel &&) = default; + + /** Set the input image and the distribution output. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution. + * @param[in,out] local_hist Array that the threads use to save their local histograms. + * It's size should be equal to (number_of_threads * num_bins), + * and the Window::thread_id() is used to determine the part of the array + * used by each thread. + * @param[out] window_lut LUT with pre-calculated possible window values. + * The size of the LUT should be equal to max_range_size and it will be filled + * during the configure stage, while it re-used in every run, therefore can be + * safely shared among threads. + */ + void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); + /** Set the input image and the distribution output. + * + * @note Used for histogram of fixed size equal to 256 + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution which must be of 256 bins.. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to merge multiple partial histograms. + * + * @param[out] global_hist Pointer to the final histogram. + * @param[in] local_hist Pointer to the partial histograms. + * @param[in] bins Number of bins. + */ + void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); + /** Function to merge multiple minimum values of partial histograms. + * + * @param[out] global_min Pointer to the global min value. + * @param[in] local_min Local min value. + */ + void merge_min(uint8_t *global_min, const uint8_t &local_min); + /** Function to perform histogram on the given window + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_U8(Window win); + /** Function to perform histogram on the given window where histogram is + * of fixed size 256 without ranges and offsets. + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_fixed_U8(Window win); + /** Pre-calculate the pixel windowing for every possible pixel + * + * Calculate (V - offset) * numBins / range where V is every possible pixel value. + * + * @note We currently support U8 image thus possible pixel values are between 0 and 255 + */ + void calculate_window_lut() const; + /** Common signature for all the specialised Histogram functions + * + * @param[in] window Region on which to execute the kernel. + */ + using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window); + + HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() + const IImage *_input; + IDistribution1D *_output; + uint32_t *_local_hist; + uint32_t *_window_lut; + std::mutex _hist_mtx; + static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images +}; +} +#endif /*__ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h new file mode 100644 index 0000000000..ebaafb467f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEIM2COLKERNEL_H__ +#define __ARM_COMPUTE_NEIM2COLKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class NEIm2ColKernel : public INEKernel +{ +public: + /** Default constructor */ + NEIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel(const NEIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + NEIm2ColKernel(NEIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; + /** Default destructor */ + ~NEIm2ColKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F32 + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] convolved_dims The convolved output dimensions. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const ITensor *input, ITensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Template function to run the im2col optimised for the fully connected layer case + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_reduced(const Window &window); + /** Template function to run the im2col used for the convolution layer case + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_generic(const Window &window); + /** Common signature for all the specialised im2col functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); + + Im2ColFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + unsigned int _kernel_size; + bool _has_bias; +}; +} +#endif /*__ARM_COMPUTE_NEIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h new file mode 100644 index 0000000000..13647889ab --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ +#define __ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform an image integral on an image */ +class NEIntegralImageKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U32 + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; +}; +} +#endif /*__ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h new file mode 100644 index 0000000000..9ab7f91092 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LKTRACKERKERNEL_H__ +#define __ARM_COMPUTE_LKTRACKERKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Internal keypoint class for Lucas-Kanade Optical Flow */ +struct NELKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + bool tracking_status{ false }; /**< the tracking status of the keypoint */ +}; + +using INELKInternalKeypointArray = IArray; + +/** Interface for the Lucas-Kanade tracker kernel */ +class NELKTrackerKernel : public INEKernel +{ +public: + /** Default constructor */ + NELKTrackerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel(const NELKTrackerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete; + /** Allow instances of this class to be moved */ + NELKTrackerKernel(NELKTrackerKernel &&) = default; + /** Allow instances of this class to be moved */ + NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default; + /** Default destructor */ + ~NELKTrackerKernel() = default; + + /** Initialise the kernel input and output + * + * @param[in] input_old Pointer to the input old tensor. Data type supported: U8 + * @param[in] input_new Pointer to the input new tensor. Data type supported. U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points + * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy, + const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points, + INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal, + Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension, + size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Initialise the array of keypoints in the provide range + * + * @param[in] start Index of first element in the keypoints array to be initialised + * @param[in] end Index after last elelemnt in the keypoints array to be initialised + */ + void init_keypoints(int start, int end); + /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y + * + * @param[in] keypoint Keypoint for which gradients are computed + * @param[out] bilinear_ix Intermediate interpolated data for X gradient + * @param[out] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values A11, A12, A22 + */ + std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int *bilinear_ix, int *bilinear_iy); + /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} + * + * @param[in] old_keypoint Old keypoint for which gradient is computed + * @param[in] new_keypoint New keypoint for which gradient is computed + * @param[in] bilinear_ix Intermediate interpolated data for X gradient + * @param[in] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values b1, b2 + */ + std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int *bilinear_ix, const int *bilinear_iy); + + const ITensor *_input_old; + const ITensor *_input_new; + const ITensor *_old_scharr_gx; + const ITensor *_old_scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + INELKInternalKeypointArray *_old_points_internal; + INELKInternalKeypointArray *_new_points_internal; + Termination _termination; + bool _use_initial_estimate; + float _pyramid_scale; + float _epsilon; + unsigned int _num_iterations; + int _window_dimension; + unsigned int _level; + unsigned int _num_levels; + ValidRegion _valid_region; +}; +} +#endif /*__ARM_COMPUTE_NELKTRACKERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..d4bff661f9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */ +class NELocallyConnectedMatrixMultiplyKernel : public INEKernel +{ +public: + /** Default constructor */ + NELocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h new file mode 100644 index 0000000000..5d49901dd0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ +#define __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMagnitudePhaseKernel(); + /** Destructor */ + ~NEMagnitudePhaseKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + +private: + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseFP16Kernel : public INEKernel +{ +public: + /** Default constructor */ + NEMagnitudePhaseFP16Kernel(); + /** Destructor */ + ~NEMagnitudePhaseFP16Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseFP16Kernel(const NEMagnitudePhaseFP16Kernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseFP16Kernel(NEMagnitudePhaseFP16Kernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseFP16Kernel &operator=(const NEMagnitudePhaseFP16Kernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseFP16Kernel &operator=(NEMagnitudePhaseFP16Kernel &&) = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseFP16Kernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; +#else +template +using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel; +#endif +} +#endif /* __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h new file mode 100644 index 0000000000..83407ccb7d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ +#define __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class NEMeanStdDevKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMeanStdDevKernel(NEMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = default; + /** Default destructor */ + ~NEMeanStdDevKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + */ + void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IImage *_input; + float *_mean; + float *_stddev; + uint64_t *_global_sum; + uint64_t *_global_sum_squared; + std::mutex _mtx; +}; +} +#endif /* __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h new file mode 100644 index 0000000000..dee1aadfb9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ +#define __ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform a median filter on a tensor */ +class NEMedian3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h new file mode 100644 index 0000000000..e405ea5ae4 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ +#define __ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to perform min max search on an image. */ +class NEMinMaxKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel(const NEMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxKernel(NEMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxKernel &operator=(NEMinMaxKernel &&) = default; + /** Default destructor */ + ~NEMinMaxKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8/S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + */ + void configure(const IImage *input, int32_t *min, int32_t *max); + /** Resets global minimum and maximum. */ + void reset(); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Performs the min/max algorithm on U8 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_U8(const Window &win); + /** Performs the min/max algorithm on S16 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_S16(const Window &win); + /** Common signature for all the specialised MinMax functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxFunction = void (NEMinMaxKernel::*)(const Window &window); + /** MinMax function to use for the particular image types passed to configure() */ + MinMaxFunction _func; + /** Helper to update min/max values **/ + template + void update_min_max(T min, T max); + + const IImage *_input; /**< Input image. */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + int32_t _min_init; /**< Value to initialise global minimum value. */ + int32_t _max_init; /**< Value to initialise global maximum value. */ + std::mutex _mtx; /**< Mutex used for result reduction. */ +}; + +/** Interface for the kernel to find min max locations of an image. */ +class NEMinMaxLocationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; + /** Default destructor */ + ~NEMinMaxLocationKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8 or S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc Array of minimum value locations. + * @param[out] max_loc Array of maximum value locations. + * @param[out] min_count Number of minimum value encounters. + * @param[out] max_count Number of maximum value encounters. + */ + void configure(const IImage *input, int32_t *min, int32_t *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + /** Performs the min/max location algorithm on T type images on a given window. + * + * @param win The window to run the algorithm on. + */ + template + void minmax_loc(const Window &win); + /** Common signature for all the specialised MinMaxLoc functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); + /** MinMaxLoc function to use for the particular image types passed to configure() */ + MinMaxLocFunction _func; + /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ + template + struct create_func_table; + + const IImage *_input; /**< Input image. */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Count of minimum value encounters. */ + uint32_t *_max_count; /**< Count of maximum value encounters. */ + ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ + ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ + unsigned int _num_elems_processed_per_iteration; /**< Elements processed per iteration. */ +}; +} +#endif /*__ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h new file mode 100644 index 0000000000..ede0294a73 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ +#define __ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to apply a non-linear filter */ +class NENonLinearFilterKernel : public INEKernel +{ +public: + /** Default constructor */ + NENonLinearFilterKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Fill mask with the corresponding given pattern. + * + * @param[in,out] mask Mask to be filled according to pattern + * @param[in] cols Columns (width) of mask + * @param[in] rows Rows (height) of mask + * @param[in] pattern Pattern to fill the mask according to + */ + void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); + /** Apply a median filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_box(const Window &win); + /** Apply a min filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_box(const Window &win); + /** Apply a max filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_box(const Window &win); + /** Apply a median filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_cross(const Window &win); + /** Apply a min filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_cross(const Window &win); + /** Apply a max filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_cross(const Window &win); + /** Apply a median filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_disk(const Window &win); + /** Apply a min filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_disk(const Window &win); + /** Apply a max filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_disk(const Window &win); + /** Apply a non-linear filter when given mask has user-defined pattern. + * + * @param[in] win Window to apply the filter on. + */ + template + void non_linear_filter_generic(const Window &win); + +private: + unsigned int _border_width; + const ITensor *_input; + ITensor *_output; + const uint8_t *_mask; + MatrixPattern _pattern; + NonLinearFilterFunction _function; + unsigned int _func_idx; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..0daae59e54 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ +#define __ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON + * + * @note Used by @ref NEFastCorners and @ref NEHarrisCorners + */ +class NENonMaximaSuppression3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NENonMaximaSuppression3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default; + /** Default destructor */ + ~NENonMaximaSuppression3x3Kernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +protected: + /** Common signature for all the specialised non-maxima suppression 3x3 functions + * + * @param[in] input_ptr Pointer to the input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] input_stride Stride of the input tensor + */ + using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride); + + NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32 + */ +class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); +}; +#else +using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; +#endif +} +#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h new file mode 100644 index 0000000000..d4e36d5ff1 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the normalization layer kernel. + */ +class NENormalizationLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NENormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NENormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: QS8/F32. + * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type supported: same as @p input + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform normalization depending on the given template + * dimension. The second template parameter specifies whether the + * normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + */ + template + void normalize(const Window &window); + + /** Function to perform normalization for fixed-point values depending on + * the given template dimension. The second template parameter specifies + * whether the normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + */ + template + void normalize_fixed_point(const Window &window); + /** Common signature for all the specialised normalization functions + * + * @param[in] window Region on which to execute the kernel. + */ + using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); + +private: + NormalizationFunction _func; + const ITensor *_input; + const ITensor *_input_squared; + ITensor *_output; + NormalizationLayerInfo _norm_info; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..7e402cd220 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEPixelWiseMultiplicationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; + /** Default destructor */ + ~NEPixelWiseMultiplicationKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 An input tensor. Data types supported: U8/QS8/S16/F32. + * @param[in] input2 An input tensor. Data types supported: U8/QS8/S16/F32. + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8) /S16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. + * @param[in] rounding_policy Rounding policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised multiplication functions with integer scaling factor + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor. + */ + using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale); + /** Common signature for all the specialised multiplication functions with fixed-point values + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[in] scale Scaling factor. + * @param[in] fixed_point_position Fixed-point position that expresses the number of bits for the fractional part of the number. + * @param[out] output_ptr Pointer to the output tensor. + */ + using MulFunctionQInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale, int fixed_point_position); + /** Common signature for all the specialised multiplication functions with float scaling factor + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor. + */ + using MulFunctionFloat = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale); + + MulFunctionFloat *_func_float; + MulFunctionInt *_func_int; + MulFunctionQInt *_func_q_int; + +private: + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; + float _scale; + int _scale_exponent; +}; +} +#endif /*__ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h new file mode 100644 index 0000000000..62a087841a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the pooling layer kernel */ +class NEPoolingLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEPoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform 2x2 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_f32(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 8bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_q8(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_f32(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling for 8bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_q8(const Window &window_input, const Window &window); + /** Common signature for all the specialised Pooling functions + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window); + +private: + PoolingFunction _func; + const ITensor *_input; + ITensor *_output; + PoolingLayerInfo _pool_info; + int _num_elems_processed_per_iteration; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h new file mode 100644 index 0000000000..f9eae68ee8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NERemapKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEREMAPKERNEL_H__ +#define __ARM_COMPUTE_NEREMAPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a remap on a tensor */ +class NERemapKernel : public INEKernel +{ +public: + /** Default constructor */ + NERemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel(const NERemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel &operator=(const NERemapKernel &) = delete; + /** Allow instances of this class to be moved */ + NERemapKernel(NERemapKernel &&) = default; + /** Allow instances of this class to be moved */ + NERemapKernel &operator=(NERemapKernel &&) = default; + /** Default destructor */ + ~NERemapKernel() = default; + + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + */ + void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** function to perform nearest interpolation on the given window */ + void remap_nearest(const Window &window); + /** function to perform bilinear interpolation on the given window */ + void remap_bilinear(const Window &window); + /** Remap function to use for the particular interpolation type passed to configure() */ + void (NERemapKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input image */ + ITensor *_output; /**< Output image */ + const ITensor *_map_x; /**< Input remap x coordinates */ + const ITensor *_map_y; /**< Input remap y coordinates */ +}; +} +#endif /*__ARM_COMPUTE_NEREMAPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h new file mode 100644 index 0000000000..03e26520b5 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCALEKERNEL_H__ +#define __ARM_COMPUTE_NESCALEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform scaling on a tensor */ +class NEScaleKernel : public INEKernel +{ +public: + /** Default constructor */ + NEScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel(const NEScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel &operator=(const NEScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEScaleKernel(NEScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEScaleKernel &operator=(NEScaleKernel &&) = default; + /** Default destructor */ + ~NEScaleKernel() = default; + + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * + * @param[in] input Source tensor. Data types supported: U8/S16. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[out] output Destination tensor. Data types supported: U8/S16. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** function to perform scale using nearest interpolation on the given window */ + void scale_nearest(const Window &window); + /** function to perform scale using bilinear interpolation on the given window */ + void scale_bilinear(const Window &window); + /** function to perform scale using area interpolation on the given window + * + * @note Used only in case down-sampling. + */ + void scale_area(const Window &window); + /** Scale function to use for the particular interpolation type passed to configure() */ + void (NEScaleKernel::*_func)(const Window &window); + + const ITensor *_offsets; + const ITensor *_dx; + const ITensor *_dy; + const ITensor *_input; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NESCALEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h new file mode 100644 index 0000000000..c618456d49 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHARR3x3KERNEL_H__ +#define __ARM_COMPUTE_NESCHARR3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * +* @f[ +* \mathbf{G}_x=\begin{vmatrix} +* -3 & 0 & +3\\ +* -10& 0 & +10\\ +* -3 & 0 & +3 +* \end{vmatrix} +* @f] +*/ +class NEScharr3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NEScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; + /** Default destructor */ + ~NEScharr3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for scharr X */ + ITensor *_output_y; /**< Output tensor for scharr Y */ +}; +} +#endif /*__ARM_COMPUTE_NESCHARR3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h new file mode 100644 index 0000000000..246dd83573 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL3x3KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -1 & 0 & +1\\ + * -2 & 0 & +2\\ + * -1 & 0 & +1 + * \end{vmatrix} + * @f] +*/ +class NESobel3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel(NESobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; + /** Default destructor */ + ~NESobel3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for sobel X */ + ITensor *_output_y; /**< Output tensor for sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h new file mode 100644 index 0000000000..49c1c41e6d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL5x5KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL5x5KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. + * + */ +class NESobel5x5HorKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; + /** Default destructor */ + ~NESobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. + * +*/ +class NESobel5x5VertKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; + /** Default destructor */ + ~NESobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. + * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. + * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + ITensor *_input_x; /**< X input (X output of the hor pass) */ + ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL5x5KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h new file mode 100644 index 0000000000..4bff8596b8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL7x7KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL7x7KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. + * + */ +class NESobel7x7HorKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; + /** Default destructor */ + ~NESobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. + * +*/ +class NESobel7x7VertKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; + /** Default destructor */ + ~NESobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * @note If output_x is set then input_x must be set too + * @note If output_y is set then input_y must be set too + * + * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input_x; /**< X input (X output of the hor pass) */ + const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL7x7KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h new file mode 100644 index 0000000000..ab626ad5ec --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the identifying the max value of 1D Logits */ +class NELogits1DMaxKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NELogits1DMaxKernel(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + using Logits1DMaxFunction = void(const ITensor *in, ITensor *out, const Window &window); + +private: + Logits1DMaxFunction *_func; + BorderSize _border_size; +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class NELogits1DShiftExpSumKernel : public INEKernel +{ +public: + /** Default constructor */ + NELogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DShiftExpSumKernel(const NELogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DShiftExpSumKernel &operator=(const NELogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DShiftExpSumKernel(NELogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DShiftExpSumKernel &operator=(NELogits1DShiftExpSumKernel &&) = default; + /** Default destructor */ + ~NELogits1DShiftExpSumKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, F32. + * @param[in] max Max values tensor. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input. + */ + void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using Logits1DShiftExpSumFunction = void(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window); + +private: + Logits1DShiftExpSumFunction *_func; + const ITensor *_input; + const ITensor *_max; + ITensor *_output; + ITensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class NELogits1DNormKernel : public INEKernel +{ +public: + /** Default constructor */ + NELogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DNormKernel(const NELogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DNormKernel &operator=(const NELogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DNormKernel(NELogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DNormKernel &operator=(NELogits1DNormKernel &&) = default; + /** Default destructor */ + ~NELogits1DNormKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, F32. + * @param[in] sum Sum tensor. The number of dimensions should be dim(input)-1. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + */ + void configure(const ITensor *input, const ITensor *sum, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using Logits1DNormFunction = void(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window); + +private: + Logits1DNormFunction *_func; + const ITensor *_input; + const ITensor *_sum; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h new file mode 100644 index 0000000000..b3963e5a75 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NETableLookupKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Interface for the kernel to perform table lookup calculations. */ +class NETableLookupKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NETableLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel(const NETableLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NETableLookupKernel(NETableLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NETableLookupKernel &operator=(NETableLookupKernel &&) = default; + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8/S16. + * @param[in] lut The input LUT. + * @param[out] output The output tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Perform table lookup on a given window. + * + * @param window window Region on which to execute the kernel. + */ + template + void tableLookup(const Window &window); + /** Common signature for all the specialised lut functions + * + * @param[in] window Region on which to execute the kernel. + */ + using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + TableLookupFunction _func; + const ILut *_lut; +}; +} +#endif /* __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h new file mode 100644 index 0000000000..778176293f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEThresholdKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETHRESHOLDKERNEL_H__ +#define __ARM_COMPUTE_NETHRESHOLDKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the thresholding kernel + * + */ +class NEThresholdKernel : public INEKernel +{ +public: + /** Constructor + * Initialize all the pointers to nullptr and parameters to zero. + */ + NEThresholdKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel(const NEThresholdKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; + /** Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + * @param[in] threshold Threshold. When the threhold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** run binary thresholding on the given window */ + void run_binary(const Window &window); + /** run range thresholding on the given window */ + void run_range(const Window &window); + + void (NEThresholdKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input */ + ITensor *_output; /**< Output */ + uint8_t _threshold; + uint8_t _false_value; + uint8_t _true_value; + uint8_t _upper; +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h new file mode 100644 index 0000000000..ac9449ff92 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class NETransposeKernel : public INEKernel +{ +public: + /** Default constructor */ + NETransposeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel(const NETransposeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel &operator=(const NETransposeKernel &) = delete; + /** Allow instances of this class to be moved */ + NETransposeKernel(NETransposeKernel &&) = default; + /** Allow instances of this class to be moved */ + NETransposeKernel &operator=(NETransposeKernel &&) = default; + /** Default destructor */ + ~NETransposeKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the transpose functions + * + * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + */ + using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window); + /** Transpose function to use for the particular tensor types passed to configure() */ + TransposeFunction *_func; + const ITensor *_input; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h new file mode 100644 index 0000000000..10fed1d450 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEWarpKernel.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPKERNEL_H__ +#define __ARM_COMPUTE_NEWARPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Common interface for warp affine and warp perspective */ +class INEWarpKernel : public INEKernel +{ +public: + /** Default constructor */ + INEWarpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel(const INEWarpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel &operator=(const INEWarpKernel &) = delete; + /** Allow instances of this class to be moved */ + INEWarpKernel(INEWarpKernel &&) = default; + /** Allow instances of this class to be moved */ + INEWarpKernel &operator=(INEWarpKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. + * @param[in] border_mode Strategy to use for borders + * @param[in] constant_border_value Constant value used for filling the border. + */ + virtual void configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_undefined(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_constant(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_replicate(const Window &window) = 0; + /** Common signature for all the specialised warp functions + * + * @param[in] window Region on which to execute the kernel. + */ + void (INEWarpKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input Tensor */ + ITensor *_output; /**< Output Tensor */ + uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ + const float *_matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ +}; + +/** Template interface for the kernel to compute warp affine + * + */ +template +class NEWarpAffineKernel : public INEWarpKernel +{ +private: + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; + +/** Template interface for the kernel to compute warp perspective + * + */ +template +class NEWarpPerspectiveKernel : public INEWarpKernel +{ +private: + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_NEWARPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h new file mode 100644 index 0000000000..cad2d00b1f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class NEWeightsReshapeKernel : public INEKernel +{ +public: + /** Constructor.*/ + NEWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~NEWeightsReshapeKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/F32 + * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @param[out] output The output tensor. Data types supported: Same as @p input + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using WeightsReshapeKernel = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window); + + WeightsReshapeKernel *_func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h new file mode 100644 index 0000000000..b4912ce15a --- /dev/null +++ b/arm_compute/core/PixelValue.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PIXELVALUE_H__ +#define __ARM_COMPUTE_PIXELVALUE_H__ + +#include + +namespace arm_compute +{ +/** Class describing the value of a pixel for any image format. */ +class PixelValue +{ +public: + /** Default constructor: value initialized to 0 */ + PixelValue() + : value{ { 0 } } + { + } + /** Initialize the union with a U8 pixel value + * + * @param[in] v U8 value. + */ + PixelValue(uint8_t v) + : PixelValue() + { + value.u8 = v; + } + /** Initialize the union with a U16 pixel value + * + * @param[in] v U16 value. + */ + PixelValue(uint16_t v) + : PixelValue() + { + value.u16 = v; + } + /** Initialize the union with a S16 pixel value + * + * @param[in] v S16 value. + */ + PixelValue(int16_t v) + : PixelValue() + { + value.s16 = v; + } + /** Initialize the union with a U32 pixel value + * + * @param[in] v U32 value. + */ + PixelValue(uint32_t v) + : PixelValue() + { + value.u32 = v; + } + /** Initialize the union with a S32 pixel value + * + * @param[in] v S32 value. + */ + PixelValue(int32_t v) + : PixelValue() + { + value.s32 = v; + } + /** Initialize the union with a F32 pixel value + * + * @param[in] v F32 value. + */ + PixelValue(float v) + : PixelValue() + { + value.f32 = v; + } + /** Union which describes the value of a pixel for any image format. + * Use the field corresponding to the image format + */ + union + { + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + float f32; /**< Single channel float 32 */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ + } value; + /** Interpret the pixel value as a U8 + * + * @param[out] v Returned value + */ + void get(uint8_t &v) const + { + v = value.u8; + } + /** Interpret the pixel value as a S8 + * + * @param[out] v Returned value + */ + void get(int8_t &v) const + { + v = value.s8; + } + /** Interpret the pixel value as a U16 + * + * @param[out] v Returned value + */ + void get(uint16_t &v) const + { + v = value.u16; + } + /** Interpret the pixel value as a S16 + * + * @param[out] v Returned value + */ + void get(int16_t &v) const + { + v = value.s16; + } + /** Interpret the pixel value as a U32 + * + * @param[out] v Returned value + */ + void get(uint32_t &v) const + { + v = value.u32; + } + /** Interpret the pixel value as a S32 + * + * @param[out] v Returned value + */ + void get(int32_t &v) const + { + v = value.s32; + } + /** Interpret the pixel value as a F32 + * + * @param[out] v Returned value + */ + void get(float &v) const + { + v = value.f32; + } +}; +} +#endif /* __ARM_COMPUTE_PIXELVALUE_H__ */ diff --git a/arm_compute/core/PyramidInfo.h b/arm_compute/core/PyramidInfo.h new file mode 100644 index 0000000000..76b3852bbf --- /dev/null +++ b/arm_compute/core/PyramidInfo.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PYRAMIDINFO_H__ +#define __ARM_COMPUTE_PYRAMIDINFO_H__ + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Store the Pyramid's metadata */ +class PyramidInfo +{ +public: + /** Default constructor */ + PyramidInfo(); + /** Default destructor */ + virtual ~PyramidInfo() = default; + /** Allow instances of this class to be copy constructed */ + PyramidInfo(const PyramidInfo &) = default; + /** Allow instances of this class to be copied */ + PyramidInfo &operator=(const PyramidInfo &) = default; + /** Allow instances of this class to be move constructed */ + PyramidInfo(PyramidInfo &&) = default; + /** Allow instances of this class to be moved */ + PyramidInfo &operator=(PyramidInfo &&) = default; + + /** Create pyramid info for 2D tensors + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] width The width of the 2D tensor at 0th pyramid level + * @param[in] height The height of the 2D tensor at 0th pyramid level + * @param[in] format The format of all 2D tensors in the pyramid + * NV12, NV21, IYUV, UYVY and YUYV formats are not supported. + */ + PyramidInfo(size_t num_levels, float scale, size_t width, size_t height, Format format); + + /** Create pyramid info using TensorShape + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements + * @param[in] format The format of all tensors in the pyramid + */ + PyramidInfo(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format); + + /** Initialize pyramid's metadata for 2D tensors + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] width The width of the 2D tensor at 0th pyramid level + * @param[in] height The height of the 2D tensor at 0th pyramid level + * @param[in] format The format of all 2D tensors in the pyramid + * NV12, NV21, IYUV, UYVY and YUYV formats are not supported. + */ + void init(size_t num_levels, float scale, size_t width, size_t height, Format format); + /** Initialize pyramid's metadata using TensorShape + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements + * @param[in] format The format of all tensors in the pyramid + */ + void init(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format); + /** Return the number of the pyramid levels + * + * @return The number of the pyramid levels + */ + size_t num_levels() const; + /** Return the width of the 0th level tensor + * + * @return The width of the 0th level tensor + */ + size_t width() const; + /** Return the height of the 0th level tensor + * + * @return The height of the 0th level tensor + */ + size_t height() const; + /** Return the TensorShape of the o-th level tensor + * + * @return + */ + const TensorShape &tensor_shape() const; + /** Return the image format of all tensor in the pyramid + * + * @return The image format + */ + Format format() const; + /** Return the scale factor of the pyramid + * + * @return Return the scale factor + */ + float scale() const; + +private: + size_t _num_levels; + TensorShape _tensor_shape; + Format _format; + float _scale; +}; +} +#endif /*__ARM_COMPUTE_PYRAMIDINFO_H__ */ diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h new file mode 100644 index 0000000000..cb053ea2c4 --- /dev/null +++ b/arm_compute/core/Size2D.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SIZE2D_H__ +#define __ARM_COMPUTE_SIZE2D_H__ + +#include + +namespace arm_compute +{ +/** Class for specifying the size of an image or rectangle */ +class Size2D +{ +public: + /** Default constructor */ + Size2D() + : width(0), height(0) + { + } + /** Constructor. Initializes "width" and "height" respectively with "w" and "h" + * + * @param[in] w Width of the image or rectangle + * @param[in] h Height of the image or rectangle + */ + Size2D(size_t w, size_t h) + : width(w), height(h) + { + } + /** Constructor. Initializes "width" and "height" with the dimensions of "size" + * + * @param[in] size Size data object + */ + Size2D(const Size2D &size) + : width(size.width), height(size.height) + { + } + /** Copy assignment + * + * @param[in] size Constant reference input "Size2D" data object to copy + * + * @return Reference to the newly altered left hand side "Size2D" data object + */ + Size2D &operator=(const Size2D &size) + { + width = size.width; + height = size.height; + return *this; + } + /** The area of the image or rectangle calculated as (width * height) + * + * @return Area (width * height) + * + */ + size_t area() const + { + return (width * height); + } + +public: + size_t width; /**< Width of the image region or rectangle */ + size_t height; /**< Height of the image region or rectangle */ +}; +} +#endif /*__ARM_COMPUTE_SIZE2D_H__ */ diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h new file mode 100644 index 0000000000..33a88a2568 --- /dev/null +++ b/arm_compute/core/Steps.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_STEPS_H__ +#define __ARM_COMPUTE_STEPS_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Class to describe a number of elements in each dimension. Similar to @ref + * Strides but not in bytes but number of elements. + */ +class Steps : public Dimensions +{ +public: + /** Constructor to initialize the steps. + * + * @param[in] steps Values to initialize the steps. + */ + template + Steps(Ts... steps) + : Dimensions{ steps... } + { + // Initialize empty dimensions to 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + /** Allow instances of this class to be copy constructed */ + constexpr Steps(const Steps &) = default; + /** Allow instances of this class to be copied */ + Steps &operator=(const Steps &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Steps(Steps &&) = default; + /** Allow instances of this class to be moved */ + Steps &operator=(Steps &&) = default; + /** Default destructor */ + ~Steps() = default; +}; +} +#endif /*__ARM_COMPUTE_STEPS_H__*/ diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h new file mode 100644 index 0000000000..329fafb5f8 --- /dev/null +++ b/arm_compute/core/Strides.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_STRIDES_H__ +#define __ARM_COMPUTE_STRIDES_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Strides of an item in bytes */ +class Strides : public Dimensions +{ +public: + /** Constructor to initialize the strides. + * + * @param[in] strides Values to initialize the strides. + */ + template + constexpr Strides(Ts... strides) + : Dimensions{ strides... } + { + } + /** Allow instances of this class to be copy constructed */ + constexpr Strides(const Strides &) = default; + /** Allow instances of this class to be copied */ + Strides &operator=(const Strides &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Strides(Strides &&) = default; + /** Allow instances of this class to be moved */ + Strides &operator=(Strides &&) = default; + /** Default destructor */ + ~Strides() = default; +}; +} +#endif /*__ARM_COMPUTE_STRIDES_H__*/ diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h new file mode 100644 index 0000000000..e2532fd487 --- /dev/null +++ b/arm_compute/core/SubTensorInfo.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SUBTENSORINFO_H__ +#define __ARM_COMPUTE_SUBTENSORINFO_H__ + +#include "arm_compute/core/ITensorInfo.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Validate.h" + +#include + +namespace arm_compute +{ +/** Store the sub tensor's metadata */ +class SubTensorInfo final : public ITensorInfo +{ +public: + /** Default constructor */ + SubTensorInfo(); + /** Default constructor + * + * @param[in] parent Metadata of parent tensor. + * @param[in] tensor_shape Tensor shape. Shape must fit inside parent's shape. + * X and Y dimensions must match the parent's ones. + * @param[in] coords Coordinates of starting element inside parent tensor. + */ + SubTensorInfo(ITensorInfo *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Default destructor */ + ~SubTensorInfo() = default; + /** Allow instances of this class to be copy constructed */ + SubTensorInfo(const SubTensorInfo &) = default; + /** Allow instances of this class to be copied */ + SubTensorInfo &operator=(const SubTensorInfo &) = default; + /** Allow instances of this class to be move constructed */ + SubTensorInfo(SubTensorInfo &&) = default; + /** Allow instances of this class to be moved */ + SubTensorInfo &operator=(SubTensorInfo &&) = default; + + // Inherited methods overridden: + void set_data_type(DataType data_type) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_data_type(data_type); + }; + void set_num_channels(int num_channels) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_num_channels(num_channels); + }; + void set_format(Format format) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_format(format); + }; + void set_fixed_point_position(int fixed_point_position) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_fixed_point_position(fixed_point_position); + }; + void set_tensor_shape(TensorShape shape) override; + bool auto_padding() override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->auto_padding(); + }; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override + { + return _tensor_shape[index]; + } + const Strides &strides_in_bytes() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->strides_in_bytes(); + } + size_t offset_first_element_in_bytes() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->offset_element_in_bytes(_coords); + } + size_t offset_element_in_bytes(const Coordinates &pos) const override; + int fixed_point_position() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->fixed_point_position(); + } + size_t element_size() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->element_size(); + } + size_t num_dimensions() const override + { + return _tensor_shape.num_dimensions(); + } + size_t num_channels() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->num_channels(); + } + const TensorShape &tensor_shape() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _tensor_shape; + } + DataType data_type() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->data_type(); + } + Format format() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->format(); + } + size_t total_size() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->total_size(); + } + PaddingSize padding() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->padding(); + } + bool has_padding() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->has_padding(); + } + bool is_resizable() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->is_resizable(); + } + void set_is_resizable(bool is_resizable) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_is_resizable(is_resizable); + } + ValidRegion valid_region() const override + { + return _valid_region; + } + void set_valid_region(ValidRegion valid_region) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region); + _valid_region = std::move(valid_region); + } + +private: + ITensorInfo *_parent; + TensorShape _tensor_shape; + Coordinates _coords; + ValidRegion _valid_region; +}; +} +#endif /*__ARM_COMPUTE_SUBTENSORINFO_H__ */ diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h new file mode 100644 index 0000000000..35b9ccb9ff --- /dev/null +++ b/arm_compute/core/TensorInfo.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORINFO_H__ +#define __ARM_COMPUTE_TENSORINFO_H__ + +#include "arm_compute/core/ITensorInfo.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace arm_compute +{ +class HOGInfo; + +/** Store the tensor's metadata */ +class TensorInfo final : public ITensorInfo +{ +public: + /** Default constructor */ + TensorInfo(); + /** Default destructor */ + ~TensorInfo() = default; + /** Allow instances of this class to be copy constructed */ + TensorInfo(const ITensorInfo &info); + /** Allow instances of this class to be copy constructed */ + TensorInfo(const TensorInfo &) = default; + /** Allow instances of this class to be copied */ + TensorInfo &operator=(const TensorInfo &) = default; + /** Allow instances of this class to be move constructed */ + TensorInfo(TensorInfo &&) = default; + /** Allow instances of this class to be moved */ + TensorInfo &operator=(TensorInfo &&) = default; + + /** Construct a tensor info with a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] format Format of the tensor. + */ + TensorInfo(Format format); + + /** 2D tensor constructor + * + * @param[in] width Width of the 2D tensor + * @param[in] height Height of the 2D tensor + * @param[in] format Single plane format of the tensor. + */ + TensorInfo(unsigned int width, unsigned int height, Format format); + /** Constructor + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + */ + TensorInfo(const TensorShape &tensor_shape, Format format); + + /** Construct a tensor info with a data type and number of channels. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) It specifies the fixed point position when the tensor data type is QS8, QS16 or QS32. + */ + TensorInfo(size_t num_channels, DataType data_type, size_t fixed_point_position = 0); + + /** Constructor + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + */ + TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + /** Constructor + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height); + + /** Initialize the tensor info with just a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] format Single plane format of the tensor. + */ + void init(Format format); + + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + */ + void init(const TensorShape &tensor_shape, Format format); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + * @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor. + * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. + * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). + */ + void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes); + + /** Initialize the tensor info with just a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] fixed_point_position (Optional) Fixed point position when the tensor data type is QS8, QS16 or QS32. + */ + void init(size_t num_channels, DataType data_type, size_t fixed_point_position = 0); + + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + */ + void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor. + * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. + * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + */ + void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, + size_t total_size_in_bytes, int fixed_point_position = 0); + /** Initialize the metadata structure for the given HOG's metadata + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + void init(const HOGInfo &hog_info, unsigned int width, unsigned int height); + /** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated) + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements + * @param[in] format Single plane format of the image. + * + * @return Total allocation size including padding in bytes. + */ + size_t init_auto_padding(const TensorShape &tensor_shape, Format format); + /** Initialize the metadata structure for the given tensor shape, number of channels, + * data type and fixed point position. (Padding is automatically calculated) + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + * + * @return Total allocation size including padding in bytes. + */ + size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + /** Initialize the metadata structure for the given HOG's metadata + * + * @note init_auto_padding will be used for the tensor initialization. + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + size_t init_auto_padding(const HOGInfo &hog_info, unsigned int width, unsigned int height); + + // Inherited methods overridden: + void set_data_type(DataType data_type) override; + void set_num_channels(int num_channels) override; + void set_format(Format format) override; + void set_tensor_shape(TensorShape shape) override; + void set_fixed_point_position(int fixed_point_position) override; + bool auto_padding() override; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override + { + return _tensor_shape[index]; + } + const Strides &strides_in_bytes() const override + { + return _strides_in_bytes; + } + size_t offset_first_element_in_bytes() const override + { + return _offset_first_element_in_bytes; + } + size_t offset_element_in_bytes(const Coordinates &pos) const override; + int fixed_point_position() const override + { + return _fixed_point_position; + } + size_t element_size() const override + { + return data_size_from_type(_data_type) * _num_channels; + } + size_t num_dimensions() const override + { + return _tensor_shape.num_dimensions(); + } + size_t num_channels() const override + { + return _num_channels; + } + const TensorShape &tensor_shape() const override + { + return _tensor_shape; + } + DataType data_type() const override + { + return _data_type; + } + Format format() const override + { + return _format; + } + size_t total_size() const override + { + return _total_size; + } + PaddingSize padding() const override + { + return _padding; + } + bool has_padding() const override + { + return !_padding.empty(); + } + bool is_resizable() const override + { + return _is_resizable; + } + void set_is_resizable(bool is_resizable) override + { + _is_resizable = is_resizable; + } + ValidRegion valid_region() const override + { + return _valid_region; + } + void set_valid_region(ValidRegion valid_region) override + { + _valid_region = std::move(valid_region); + } + +private: + /** Calculates strides, offset and total size resulting from the specified padding around the XY plane. + * + * @param[in] padding Padding around the XY plane in elements. + */ + std::tuple calculate_padding_requirements(const PaddingSize &padding); + + size_t _total_size; + int _fixed_point_position; + size_t _offset_first_element_in_bytes; + Strides _strides_in_bytes; + size_t _num_channels; + TensorShape _tensor_shape; + DataType _data_type; + Format _format; + bool _is_resizable; + ValidRegion _valid_region; + PaddingSize _padding; +}; +} +#endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h new file mode 100644 index 0000000000..f8b3181686 --- /dev/null +++ b/arm_compute/core/TensorShape.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORSHAPE_H__ +#define __ARM_COMPUTE_TENSORSHAPE_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Shape of a tensor */ +class TensorShape : public Dimensions +{ +public: + /** Constructor to initialize the tensor shape. + * + * @param[in] dims Values to initialize the dimensions. + */ + template + TensorShape(Ts... dims) + : Dimensions{ dims... } + { + // Initialize unspecified dimensions to 1 + if(_num_dimensions > 0) + { + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + + // Correct number dimensions to ignore trailing dimensions of size 1 + apply_dimension_correction(); + } + /** Allow instances of this class to be copy constructed */ + TensorShape(const TensorShape &) = default; + /** Allow instances of this class to be copied */ + TensorShape &operator=(const TensorShape &) = default; + /** Allow instances of this class to be move constructed */ + TensorShape(TensorShape &&) = default; + /** Allow instances of this class to be moved */ + TensorShape &operator=(TensorShape &&) = default; + /** Default destructor */ + ~TensorShape() = default; + + /** Accessor to set the value of one of the dimensions. + * + * @param[in] dimension Dimension for which the value is set. + * @param[in] value Value to be set for the dimension. + */ + void set(size_t dimension, size_t value) + { + ARM_COMPUTE_ERROR_ON(value < 1); + + // Make sure all empty dimensions are filled with 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + + // Set the specified dimension and increase the number of dimensions if + // necessary + Dimensions::set(dimension, value); + + // Correct number dimensions to ignore trailing dimensions of size 1 + apply_dimension_correction(); + } + + /** Collapse the first n dimensions. + * + * @param[in] first Dimensions into which the following @p n are collapsed. + * @param[in] n Number of dimensions to collapse into @p first. + */ + void collapse(size_t n, size_t first = 0) + { + Dimensions::collapse(n, first); + + // Make sure all empty dimensions are filled with 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + + /** Collapses all dimensions to a single linear total size. + * + * @return The total tensor size in terms of elements. + */ + size_t total_size() const + { + return std::accumulate(_id.begin(), _id.end(), 1, std::multiplies()); + } + /** Collapses given dimension and above. + * + * @note Precondition: dimension < TensorShape::num_max_dimensions + * + * @param[in] dimension Size of the wanted dimension + * + * @return The linear size of the collapsed dimensions + */ + size_t total_size_upper(size_t dimension) const + { + return std::accumulate(_id.begin() + dimension, _id.end(), 1, std::multiplies()); + } + +private: + /** Remove trailing dimensions of size 1 from the reported number of dimensions. */ + void apply_dimension_correction() + { + for(int i = static_cast(_num_dimensions) - 1; i >= 0; --i) + { + if(_id[i] == 1) + { + --_num_dimensions; + } + else + { + break; + } + } + } +}; +} +#endif /*__ARM_COMPUTE_TENSORSHAPE_H__*/ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h new file mode 100644 index 0000000000..725567b9ae --- /dev/null +++ b/arm_compute/core/Types.h @@ -0,0 +1,636 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TYPES_H__ +#define __ARM_COMPUTE_TYPES_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/TensorShape.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Image colour formats */ +enum class Format +{ + UNKNOWN, /** Unknown image format */ + U8, /** 1 channel, 1 U8 per channel */ + S16, /** 1 channel, 1 S16 per channel */ + U16, /** 1 channel, 1 U16 per channel */ + S32, /** 1 channel, 1 S32 per channel */ + U32, /** 1 channel, 1 U32 per channel */ + F16, /** 1 channel, 1 F16 per channel */ + F32, /** 1 channel, 1 F32 per channel */ + UV88, /** 2 channel, 1 U8 per channel */ + RGB888, /** 3 channels, 1 U8 per channel */ + RGBA8888, /** 4 channels, 1 U8 per channel */ + YUV444, /** A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ + YUYV422, /** A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ + NV12, /** A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ + NV21, /** A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ + IYUV, /** A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ + UYVY422 /** A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ +}; + +/** Available data types */ +enum class DataType +{ + UNKNOWN, + U8, + S8, + QS8, + U16, + S16, + QS16, + U32, + S32, + U64, + S64, + F16, + F32, + F64, + SIZET +}; + +/** Constant value of the border pixels when using BorderMode::CONSTANT */ +constexpr uint8_t CONSTANT_BORDER_VALUE = 199; + +/* Constant value used to indicate a half-scale pyramid */ +constexpr float SCALE_PYRAMID_HALF = 0.5f; + +/* Constant value used to indicate a ORB scaled pyramid */ +constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01; + +struct ValidRegion +{ + ValidRegion() + : anchor{}, shape{} + { + } + + ValidRegion(const ValidRegion &) = default; + ValidRegion(ValidRegion &&) = default; + ValidRegion &operator=(const ValidRegion &) = default; + ValidRegion &operator=(ValidRegion &&) = default; + ~ValidRegion() = default; + + ValidRegion(Coordinates anchor, TensorShape shape) + : anchor{ anchor }, shape{ shape } + { + } + + /** Return the start of the valid region for the given dimension @p d */ + int start(unsigned int d) const + { + return anchor[d]; + } + + /** Return the end of the valid region for the given dimension @p d */ + int end(unsigned int d) const + { + return anchor[d] + shape[d]; + } + + Coordinates anchor; + TensorShape shape; +}; + +/** Methods available to handle borders */ +enum class BorderMode +{ + UNDEFINED, /**< Borders are left undefined */ + CONSTANT, /**< Pixels outside the image are assumed to have a constant value */ + REPLICATE /**< Pixels outside the image are assumed to have the same value as the closest image pixel */ +}; + +/** Container for 2D border size */ +struct BorderSize +{ + /** Empty border, i.e. no border */ + constexpr BorderSize() + : top{ 0 }, right{ 0 }, bottom{ 0 }, left{ 0 } + { + } + + /** Border with equal size around the 2D plane */ + constexpr BorderSize(unsigned int size) + : top{ size }, right{ size }, bottom{ size }, left{ size } + { + } + + /** Border with same size for top/bottom and left/right */ + constexpr BorderSize(unsigned int top_bottom, unsigned int left_right) + : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right } + { + } + + /** Border with different sizes */ + constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left) + : top{ top }, right{ right }, bottom{ bottom }, left{ left } + { + } + + /** Check if the entire border is zero */ + constexpr bool empty() const + { + return top == 0 && right == 0 && bottom == 0 && left == 0; + } + + /** Check if the border is the same size on all sides */ + constexpr bool uniform() const + { + return top == right && top == bottom && top == left; + } + + BorderSize &operator*=(float scale) + { + top *= scale; + right *= scale; + bottom *= scale; + left *= scale; + + return *this; + } + + BorderSize operator*(float scale) + { + BorderSize size = *this; + size *= scale; + + return size; + } + + void limit(const BorderSize &limit) + { + top = std::min(top, limit.top); + right = std::min(right, limit.right); + bottom = std::min(bottom, limit.bottom); + left = std::min(left, limit.left); + } + + unsigned int top; + unsigned int right; + unsigned int bottom; + unsigned int left; +}; + +using PaddingSize = BorderSize; + +/** Policy to handle overflow */ +enum class ConvertPolicy +{ + WRAP, /**< Wrap around */ + SATURATE /**< Saturate */ +}; + +/** Interpolation method */ +enum class InterpolationPolicy +{ + NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */ + BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */ + AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ +}; + +/** Bilinear Interpolation method used by LKTracker */ +enum class BilinearInterpolation +{ + BILINEAR_OLD_NEW, + BILINEAR_SCHARR +}; + +/** Threshold mode */ +enum class ThresholdType +{ + BINARY, /**< Threshold with one value */ + RANGE /**< Threshold with two values*/ +}; + +/** Rounding method */ +enum class RoundingPolicy +{ + TO_ZERO, /**< Truncates the least significand values that are lost in operations. */ + TO_NEAREST_UP, /**< Rounds to nearest value; half rounds up */ + TO_NEAREST_EVEN /**< Rounds to nearest value; half rounds to nearest even */ +}; + +/** Termination criteria */ +enum class Termination +{ + TERM_CRITERIA_EPSILON, + TERM_CRITERIA_ITERATIONS, + TERM_CRITERIA_BOTH +}; + +/** Magnitude calculation type. */ +enum class MagnitudeType +{ + L1NORM, /**< L1 normalization type */ + L2NORM /**< L2 normalization type */ +}; + +/** Phase calculation type. + * + * @note When PhaseType == SIGNED, each angle is mapped to the range 0 to 255 inclusive otherwise angles between 0 and 180 + */ +enum class PhaseType +{ + SIGNED, /**< Angle range: [0, 360] */ + UNSIGNED /**< Angle range: [0, 180] */ +}; + +/** Keypoint type */ +struct KeyPoint +{ + int32_t x{ 0 }; /**< X coordinates */ + int32_t y{ 0 }; /**< Y coordinates */ + float strength{ 0.f }; /**< Strength of the point */ + float scale{ 0.f }; /**< Scale initialized to 0 by the corner detector */ + float orientation{ 0.f }; /**< Orientation initialized to 0 by the corner detector */ + int32_t tracking_status{ 0 }; /**< Status initialized to 1 by the corner detector, set to 0 when the point is lost */ + float error{ 0.f }; /**< Tracking error initialized to 0 by the corner detector */ +}; + +using InternalKeypoint = std::tuple; /* x,y,strength */ + +/** Rectangle type */ +struct Rectangle +{ + uint16_t x; /**< Top-left x coordinate */ + uint16_t y; /**< Top-left y coordinate */ + uint16_t width; /**< Width of the rectangle */ + uint16_t height; /**< Height of the rectangle */ +}; + +/** Coordinate type */ +struct Coordinates2D +{ + int32_t x; /**< X coordinates */ + int32_t y; /**< Y coordinates */ +}; + +/** Coordinate type */ +struct Coordinates3D +{ + uint32_t x; /**< X coordinates */ + uint32_t y; /**< Y coordinates */ + uint32_t z; /**< Z coordinates */ +}; + +/** Available channels */ +enum class Channel +{ + UNKNOWN, /** Unknown channel format */ + C0, /**< First channel (used by formats with unknown channel types). */ + C1, /**< Second channel (used by formats with unknown channel types). */ + C2, /**< Third channel (used by formats with unknown channel types). */ + C3, /**< Fourth channel (used by formats with unknown channel types). */ + R, /**< Red channel. */ + G, /**< Green channel. */ + B, /**< Blue channel. */ + A, /**< Alpha channel. */ + Y, /**< Luma channel. */ + U, /**< Cb/U channel. */ + V /**< Cr/V/Value channel. */ +}; + +/** Available matrix patterns */ +enum class MatrixPattern +{ + BOX, /**< Box pattern matrix. */ + CROSS, /**< Cross pattern matrix. */ + DISK, /**< Disk pattern matrix. */ + OTHER /**< Any other matrix pattern. */ +}; + +/** Available non linear functions. */ +enum class NonLinearFilterFunction : unsigned +{ + MEDIAN = 0, /**< Non linear median filter. */ + MIN = 1, /**< Non linear erode. */ + MAX = 2, /**< Non linear dilate. */ +}; + +/** The normalization type used for the normalization layer */ +enum class NormType +{ + IN_MAP_1D, /**< Normalization applied within the same map in 1D region */ + IN_MAP_2D, /**< Normalization applied within the same map in 2D region */ + CROSS_MAP /**< Normalization applied cross maps */ +}; + +/** Normalization type for Histogram of Oriented Gradients (HOG) */ +enum class HOGNormType +{ + L2_NORM = 1, /**< L2-norm */ + L2HYS_NORM = 2, /**< L2-norm followed by clipping */ + L1_NORM = 3 /**< L1 norm */ +}; + +/** Detection window used for the object detection. The detection window keeps the following information: + * + * -# Geometry of the rectangular window (x/y of top-left corner and width/height) + * -# Index of the class used for evaluating which class the detection window belongs to + * -# Confidence value (score) obtained with the classifier + */ +struct DetectionWindow +{ + uint16_t x{ 0 }; /**< Top-left x coordinate */ + uint16_t y{ 0 }; /**< Top-left y coordinate */ + uint16_t width{ 0 }; /**< Width of the detection window */ + uint16_t height{ 0 }; /**< Height of the detection window */ + uint16_t idx_class{ 0 }; /**< Index of the class */ + float score{ 0.f }; /**< Confidence value for the detection window */ +}; + +/** Dimension rounding type when down-scaling on CNNs + * @note Used in pooling and convolution layer + */ +enum class DimensionRoundingType +{ + FLOOR, /**< Floor rounding */ + CEIL /**< Ceil rounding */ +}; + +/** Available pooling types */ +enum class PoolingType +{ + MAX, /**< Max Pooling */ + AVG /**< Average Pooling */ +}; + +/** Padding and stride information class */ +class PadStrideInfo +{ +public: + /** Constructor + * + * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. + * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. + * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. + * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. + * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR. + */ + PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1, + unsigned int pad_x = 0, unsigned int pad_y = 0, + DimensionRoundingType round = DimensionRoundingType::FLOOR) + : _stride(std::make_pair(stride_x, stride_y)), + _pad(std::make_pair(pad_x, pad_y)), + _round_type(round) + { + } + std::pair stride() const + { + return _stride; + } + std::pair pad() const + { + return _pad; + } + DimensionRoundingType round() const + { + return _round_type; + } + +private: + std::pair _stride; + std::pair _pad; + DimensionRoundingType _round_type; +}; + +/** Pooling Layer Information class */ +class PoolingLayerInfo +{ +public: + /** Default Constructor + * + * @param[in] pool_type Pooling type @ref PoolingType. Defaults to @ref PoolingType::MAX + * @param[in] pool_size (Optional) Pooling size, in elements, across x and y. Defaults to 2. + * @param[in] pad_stride_info (Optional) Padding and stride information @ref PadStrideInfo + */ + PoolingLayerInfo(PoolingType pool_type = PoolingType::MAX, unsigned int pool_size = 2, PadStrideInfo pad_stride_info = PadStrideInfo()) + : _pool_type(pool_type), _pool_size(pool_size), _pad_stride_info(pad_stride_info) + { + } + PoolingType pool_type() const + { + return _pool_type; + } + unsigned int pool_size() const + { + return _pool_size; + } + PadStrideInfo pad_stride_info() const + { + return _pad_stride_info; + } + +private: + PoolingType _pool_type; + unsigned int _pool_size; + PadStrideInfo _pad_stride_info; +}; + +/** Activation Layer Information class */ +class ActivationLayerInfo +{ +public: + /** Available activation functions */ + enum class ActivationFunction + { + LOGISTIC, /**< Logistic */ + TANH, /**< Hyperbolic tangent */ + RELU, /**< Rectifier */ + BOUNDED_RELU, /**< Bounded Rectifier */ + SOFT_RELU, /**< Soft Rectifier */ + ABS, /**< Absolute */ + SQUARE, /**< Square */ + SQRT, /**< Square root */ + LINEAR /**< Linear */ + }; + + /** Default Constructor + * + * @param[in] f The activation function to use. + * @param[in] a (Optional) The alpha parameter used by some activation functions + * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). + * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). + */ + ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f) + : _act(f), _a(a), _b(b) + { + } + ActivationFunction activation() const + { + return _act; + } + float a() const + { + return _a; + } + float b() const + { + return _b; + } + +private: + ActivationFunction _act; + float _a; + float _b; +}; + +/** Normalization Layer Information class */ +class NormalizationLayerInfo +{ +public: + /** Default Constructor + * + * @param[in] type The normalization type. Can be @ref NormType::IN_MAP_1D, @ref NormType::IN_MAP_2D or @ref NORM_TYPE::CROSS_MAP + * @param[in] norm_size The normalization size is the number of elements to normalize across. Defaults to 5. + * @param[in] alpha Alpha parameter used by normalization equation. Defaults to 0.0001. + * @param[in] beta Beta parameter used by normalization equation. Defaults to 0.5. + * @param[in] kappa Kappa parameter used by [Krichevksy 2012] Across Channel Local Brightness Normalization equation. + */ + NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f) + : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa) + { + } + NormType type() const + { + return _type; + } + uint32_t norm_size() const + { + return _norm_size; + } + float alpha() const + { + return _alpha; + } + float beta() const + { + return _beta; + } + float kappa() const + { + return _kappa; + } + /** Return the scaling factor of the normalization function. If kappa is not + * 1 then [Krichevksy 2012] normalization scaling is specified. Scaling + * factor takes into account the total number of elements used for the + * normalization, so in case of 2 dimensions this is _norm_size^2. + * + * @return The normalization scaling factor. + */ + float scale_coeff() const + { + const uint32_t size = (_type == NormType::IN_MAP_2D) ? _norm_size * _norm_size : _norm_size; + return (_kappa == 1.f) ? (_alpha / size) : _alpha; + } + +private: + NormType _type; + uint32_t _norm_size; + float _alpha; + float _beta; + float _kappa; +}; + +/** Convolution Layer Weights Information class */ +class WeightsInfo +{ +public: + WeightsInfo() + : _are_reshaped(false), _kernel_size(0) + { + } + /** Constructor + * + * @param[in] are_reshaped True if the weights have been reshaped + * @param[in] kernel_size The size of the kernel. + */ + WeightsInfo(bool are_reshaped, unsigned int kernel_size) + : _are_reshaped(are_reshaped), _kernel_size(kernel_size) + { + } + + bool are_reshaped() const + { + return _are_reshaped; + }; + unsigned int kernel_size() const + { + return _kernel_size; + } + +private: + const bool _are_reshaped; + const unsigned int _kernel_size; +}; + +/** IO formatting information class*/ +struct IOFormatInfo +{ + /** Precision type used when printing floating point numbers */ + enum class PrecisionType + { + Default, /**< Default precision to the one that the current stream has */ + Custom, /**< Custom precision specified by the user using the precision parameter */ + Full /**< The maximum precision of the floating point representation */ + }; + + /** Specifies the area to be printed, used by Tensor objects */ + enum class PrintRegion + { + ValidRegion, /**< Prints the valid region of the Tensor object */ + NoPadding, /**< Prints the Tensor object without the padding */ + Full /**< Print the tensor object including padding */ + }; + + IOFormatInfo(PrintRegion print_region = PrintRegion::ValidRegion, + PrecisionType precision_type = PrecisionType::Default, + unsigned int precision = 10, + bool align_columns = true, + std::string element_delim = " ", + std::string row_delim = "\n") + : print_region(print_region), + precision_type(precision_type), + precision(precision), + element_delim(element_delim), + row_delim(row_delim), + align_columns(align_columns) + { + } + + PrintRegion print_region; + PrecisionType precision_type; + unsigned int precision; + std::string element_delim; + std::string row_delim; + bool align_columns; +}; +} +#endif /* __ARM_COMPUTE_TYPES_H__ */ diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h new file mode 100644 index 0000000000..9d3ff0a1bd --- /dev/null +++ b/arm_compute/core/Utils.h @@ -0,0 +1,740 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_H__ +#define __ARM_COMPUTE_UTILS_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace arm_compute +{ +/** Computes the smallest number larger or equal to value that is a multiple of divisor. */ +template +inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return ((value + divisor - 1) / divisor) * divisor; +} + +/** Computes the largest number smaller or equal to value that is a multiple of divisor. */ +template +inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return (value / divisor) * divisor; +} + +/** Calculate the rounded up quotient of val / m. */ +template +constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m) +{ + return (val + m - 1) / m; +} + +/** Returns the arm_compute library build information + * + * Contains the version number and the build options used to build the library + * + * @return The arm_compute library build information + */ +std::string build_information(); + +/** Load an entire file in memory + * + * @param[in] filename Name of the file to read. + * @param[in] binary Is it a binary file ? + * + * @return The content of the file. + */ +std::string read_file(const std::string &filename, bool binary); + +/** Return a value as a string + * + * @param[in] val Input value. + * + * @return Value represented as a string + */ +template +const std::string val_to_string(T val) +{ + return static_cast(std::ostringstream() << val).str(); +} + +/** The size in bytes of the data type + * + * @param[in] data_type Input data type + * + * @return The size in bytes of the data type + */ +inline size_t data_size_from_type(DataType data_type) +{ + switch(data_type) + { + case DataType::U8: + case DataType::S8: + case DataType::QS8: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::F16: + case DataType::QS16: + return 2; + case DataType::F32: + case DataType::U32: + case DataType::S32: + return 4; + case DataType::F64: + case DataType::U64: + case DataType::S64: + return 8; + case DataType::SIZET: + return sizeof(size_t); + default: + ARM_COMPUTE_ERROR("Invalid data type"); + return 0; + } +} + +/** The size in bytes of the pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline size_t pixel_size_from_format(Format format) +{ + switch(format) + { + case Format::U8: + return 1; + case Format::U16: + case Format::S16: + case Format::F16: + case Format::UV88: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + case Format::U32: + case Format::S32: + case Format::F32: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Undefined pixel size for given format"); + return 0; + } +} + +/** The size in bytes of the data type + * + * @param[in] dt Input data type + * + * @return The size in bytes of the data type + */ +inline size_t element_size_from_data_type(DataType dt) +{ + switch(dt) + { + case DataType::S8: + case DataType::U8: + case DataType::QS8: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::QS16: + case DataType::F16: + return 2; + case DataType::U32: + case DataType::S32: + case DataType::F32: + return 4; + default: + ARM_COMPUTE_ERROR("Undefined element size for given data type"); + return 0; + } +} + +/** Return the data type used by a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline DataType data_type_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::UV88: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return DataType::U8; + case Format::U16: + return DataType::U16; + case Format::S16: + return DataType::S16; + case Format::U32: + return DataType::U32; + case Format::S32: + return DataType::S32; + case Format::F16: + return DataType::F16; + case Format::F32: + return DataType::F32; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Not supported data_type for given format"); + return DataType::UNKNOWN; + } +} + +/** Return the plane index of a given channel given an input format. + * + * @param[in] format Input format + * @param[in] channel Input channel + * + * @return The plane index of the specific channel of the specific format + */ +inline int plane_idx_from_channel(Format format, Channel channel) +{ + switch(format) + { + case Format::NV12: + case Format::NV21: + { + switch(channel) + { + case Channel::Y: + return 0; + case Channel::U: + case Channel::V: + return 1; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::IYUV: + case Format::YUV444: + { + switch(channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of planes for a given format + * + * @param[in] format Input format + * + * @return The number of planes for a given image format. + */ +inline size_t num_planes_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::U32: + case Format::F16: + case Format::F32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return 1; + case Format::NV12: + case Format::NV21: + return 2; + case Format::IYUV: + case Format::YUV444: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of channels for a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The number of channels for a given image format. + */ +inline size_t num_channels_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::U16: + case Format::S16: + case Format::U32: + case Format::S32: + case Format::F16: + case Format::F32: + return 1; + // Because the U and V channels are subsampled + // these formats appear like having only 2 channels: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::UV88: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + return 0; + } +} + +/** Separate a 2D convolution into two 1D convolutions +* +* @param[in] conv 2D convolution +* @param[out] conv_col 1D vertical convolution +* @param[out] conv_row 1D horizontal convolution +* @param[in] size Size of the 2D convolution +* +* @return true if the separation was successful +*/ +inline bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size) +{ + int32_t min_col = -1; + int16_t min_col_val = -1; + + for(int32_t i = 0; i < size; ++i) + { + if(conv[i] != 0 && (min_col < 0 || abs(min_col_val) > abs(conv[i]))) + { + min_col = i; + min_col_val = conv[i]; + } + } + + if(min_col < 0) + { + return false; + } + + for(uint32_t j = 0; j < size; ++j) + { + conv_col[j] = conv[min_col + j * size]; + } + + for(uint32_t i = 0; i < size; i++) + { + if(static_cast(i) == min_col) + { + conv_row[i] = 1; + } + else + { + int16_t coeff = conv[i] / conv[min_col]; + + for(uint32_t j = 1; j < size; ++j) + { + if(conv[i + j * size] != (conv_col[j] * coeff)) + { + return false; + } + } + + conv_row[i] = coeff; + } + } + + return true; +} + +/** Calculate the scale of the given square matrix + * + * The scale is the absolute value of the sum of all the coefficients in the matrix. + * + * @note If the coefficients add up to 0 then the scale is set to 1. + * + * @param[in] matrix Matrix coefficients + * @param[in] matrix_size Number of elements per side of the square matrix. (Number of coefficients = matrix_size * matrix_size). + * + * @return The absolute value of the sum of the coefficients if they don't add up to 0, otherwise 1. + */ +inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size) +{ + const size_t size = matrix_size * matrix_size; + + return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0))); +} + +/** Calculate accurary required by the horizontal and vertical convolution computations + * + * @param[in] conv_col Pointer to the vertical vector of the separated convolution filter + * @param[in] conv_row Pointer to the horizontal vector of the convolution filter + * @param[in] size Number of elements per vector of the separated matrix + * + * @return The return type is a pair. The first element of the pair is the biggest data type needed for the first stage. The second + * element of the pair is the biggest data type needed for the second stage. + */ +inline std::pair data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size) +{ + DataType first_stage = DataType::UNKNOWN; + DataType second_stage = DataType::UNKNOWN; + + auto gez = [](const int16_t &v) + { + return v >= 0; + }; + + auto accu_neg = [](const int &first, const int &second) + { + return first + (second < 0 ? second : 0); + }; + + auto accu_pos = [](const int &first, const int &second) + { + return first + (second > 0 ? second : 0); + }; + + const bool only_positive_coefficients = std::all_of(conv_row, conv_row + size, gez) && std::all_of(conv_col, conv_col + size, gez); + + if(only_positive_coefficients) + { + const int max_row_value = std::accumulate(conv_row, conv_row + size, 0) * UINT8_MAX; + const int max_value = std::accumulate(conv_col, conv_col + size, 0) * max_row_value; + + first_stage = (max_row_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; + + second_stage = (max_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; + } + else + { + const int min_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_neg) * UINT8_MAX; + const int max_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_pos) * UINT8_MAX; + const int neg_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_neg); + const int pos_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_pos); + const int min_value = neg_coeffs_sum * max_row_value + pos_coeffs_sum * min_row_value; + const int max_value = neg_coeffs_sum * min_row_value + pos_coeffs_sum * max_row_value; + + first_stage = ((INT16_MIN <= min_row_value) && (max_row_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; + + second_stage = ((INT16_MIN <= min_value) && (max_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; + } + + return std::make_pair(first_stage, second_stage); +} + +/** Calculate the accuracy required by the squared convolution calculation. + * + * + * @param[in] conv Pointer to the squared convolution matrix + * @param[in] size The total size of the convolution matrix + * + * @return The return is the biggest data type needed to do the convolution + */ +inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t size) +{ + auto gez = [](const int16_t v) + { + return v >= 0; + }; + + const bool only_positive_coefficients = std::all_of(conv, conv + size, gez); + + if(only_positive_coefficients) + { + const int max_conv_value = std::accumulate(conv, conv + size, 0) * UINT8_MAX; + if(max_conv_value <= UINT16_MAX) + { + return DataType::U16; + } + else + { + return DataType::S32; + } + } + else + { + const int min_value = std::accumulate(conv, conv + size, 0, [](int a, int b) + { + return b < 0 ? a + b : a; + }) + * UINT8_MAX; + + const int max_value = std::accumulate(conv, conv + size, 0, [](int a, int b) + { + return b > 0 ? a + b : a; + }) + * UINT8_MAX; + + if((INT16_MIN <= min_value) && (INT16_MAX >= max_value)) + { + return DataType::S16; + } + else + { + return DataType::S32; + } + } +} + +/** Returns expected width and height of output scaled tensor depending on dimensions rounding mode. + * + * @param[in] width Width of input tensor (Number of columns) + * @param[in] height Height of input tensor (Number of rows) + * @param[in] kernel_size Kernel size. + * @param[in] stride_x Stride of the operation in the x dimension. + * @param[in] stride_y Stride of the operation in the y dimension. + * @param[in] pad_x Padding size in the x dimension. + * @param[in] pad_y Padding size in the y dimension. + * @param[in] round_type Dimensions rounding mode. + * + * @return A pair with the new width in the first position and the new height in the second. + */ +const std::pair scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_size, + unsigned int stride_x, unsigned int stride_y, + unsigned int pad_x, unsigned int pad_y, + DimensionRoundingType round_type); + +/** Convert a tensor format into a string. + * + * @param[in] format @ref Format to be translated to string. + * + * @return The string describing the format. + */ +const std::string &string_from_format(Format format); + +/** Convert a channel identity into a string. + * + * @param[in] channel @ref Channel to be translated to string. + * + * @return The string describing the channel. + */ +const std::string &string_from_channel(Channel channel); + +/** Convert a data type identity into a string. + * + * @param[in] dt @ref DataType to be translated to string. + * + * @return The string describing the data type. + */ +const std::string &string_from_data_type(DataType dt); +/** Convert a matrix pattern into a string. + * + * @param[in] pattern @ref MatrixPattern to be translated to string. + * + * @return The string describing the matrix pattern. + */ +const std::string &string_from_matrix_pattern(MatrixPattern pattern); +/** Translates a given activation function to a string. + * + * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. + * + * @return The string describing the activation function. + */ +const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act); +/** Translates a given non linear function to a string. + * + * @param[in] function @ref NonLinearFilterFunction to be translated to string. + * + * @return The string describing the non linear function. + */ +const std::string &string_from_non_linear_filter_function(NonLinearFilterFunction function); +/** Translates a given interpolation policy to a string. + * + * @param[in] policy @ref InterpolationPolicy to be translated to string. + * + * @return The string describing the interpolation policy. + */ +const std::string &string_from_interpolation_policy(InterpolationPolicy policy); +/** Translates a given border mode policy to a string. + * + * @param[in] border_mode @ref BorderMode to be translated to string. + * + * @return The string describing the border mode. + */ +const std::string &string_from_border_mode(BorderMode border_mode); +/** Translates a given normalization type to a string. + * + * @param[in] type @ref NormType to be translated to string. + * + * @return The string describing the normalization type. + */ +const std::string &string_from_norm_type(NormType type); +/** Lower a given string. + * + * @param[in] val Given string to lower. + * + * @return The lowered string + */ +std::string lower_string(const std::string &val); + +/** Check if a given data type is of floating point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of floating point type, else false. + */ +inline bool is_data_type_float(DataType dt) +{ + switch(dt) + { + case DataType::F16: + case DataType::F32: + return true; + default: + return false; + } +} + +/** Check if a given data type is of fixed point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of fixed point type, else false. + */ +inline bool is_data_type_fixed_point(DataType dt) +{ + switch(dt) + { + case DataType::QS8: + case DataType::QS16: + return true; + default: + return false; + } +} + +/** Print consecutive elements to an output stream. + * + * @param[out] s Output stream to print the elements to. + * @param[in] ptr Pointer to print the elements from. + * @param[in] n Number of elements to print. + * @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0. + * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter + */ +template +void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ") +{ + using print_type = typename std::conditional::value, T, int>::type; + + for(unsigned int i = 0; i < n; ++i) + { + // Set stream width as it is not a "sticky" stream manipulator + if(stream_width != 0) + { + s.width(stream_width); + } + s << std::right << static_cast(ptr[i]) << element_delim; + } +} + +/** Identify the maximum width of n consecutive elements. + * + * @param[in] s The output stream which will be used to print the elements. Used to extract the stream format. + * @param[in] ptr Pointer to the elements. + * @param[in] n Number of elements. + * + * @return The maximum width of the elements. + */ +template +int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, unsigned int n) +{ + using print_type = typename std::conditional::value, T, int>::type; + + int max_width = -1; + for(unsigned int i = 0; i < n; ++i) + { + std::stringstream ss; + ss.copyfmt(s); + ss << static_cast(ptr[i]); + max_width = std::max(max_width, ss.str().size()); + } + return max_width; +} + +/** Print consecutive elements to an output stream. + * + * @param[out] s Output stream to print the elements to. + * @param[in] dt Data type of the elements + * @param[in] ptr Pointer to print the elements from. + * @param[in] n Number of elements to print. + * @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0. + * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter + */ +void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " "); + +/** Identify the maximum width of n consecutive elements. + * + * @param[in] s Output stream to print the elements to. + * @param[in] dt Data type of the elements + * @param[in] ptr Pointer to print the elements from. + * @param[in] n Number of elements to print. + * + * @return The maximum width of the elements. + */ +int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n); +} +#endif /*__ARM_COMPUTE_UTILS_H__ */ diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h new file mode 100644 index 0000000000..48eba70adf --- /dev/null +++ b/arm_compute/core/Validate.h @@ -0,0 +1,563 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_VALIDATE_H__ +#define __ARM_COMPUTE_VALIDATE_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/IKernel.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Window.h" + +#include + +namespace arm_compute +{ +namespace detail +{ +/* Check whether two dimension objects differ. + * + * @param[in] dim1 First object to be compared. + * @param[in] dim2 Second object to be compared. + * @param[in] upper_dim The dimension from which to check. + * + * @return Return true if the two objects are different. + */ +template +inline bool have_different_dimensions(const Dimensions &dim1, const Dimensions &dim2, unsigned int upper_dim) +{ + for(unsigned int i = upper_dim; i < arm_compute::Dimensions::num_max_dimensions; ++i) + { + if(dim1[i] != dim2[i]) + { + return true; + } + } + + return false; +} + +/** Functor to compare two @ref Dimensions objects and throw an error on mismatch. + * + * @param[in] dim Object to compare against. + * @param[in] function Function in which the error occured. + * @param[in] file File in which the error occured. + * @param[in] line Line in which the error occured. + */ +template +class compare_dimension +{ +public: + compare_dimension(const Dimensions &dim, const char *function, const char *file, int line) + : _dim{ dim }, _function{ function }, _file{ file }, _line{ line } + { + } + + /** Compare the given object against the stored one. + * + * @param[in] dim To be compared object. + */ + void operator()(const Dimensions &dim) + { + ARM_COMPUTE_ERROR_ON_LOC_MSG(have_different_dimensions(_dim, dim, 0), _function, _file, _line, + "Objects have different dimensions"); + } + +private: + const Dimensions &_dim; + const char *const _function; + const char *const _file; + const int _line; +}; +} // namespace detail +/** Throw an error if one of the pointers is a nullptr. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] pointers Pointers to check against nullptr. + */ +template +void error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers) +{ + auto is_nullptr = [&](const void *ptr) + { + ARM_COMPUTE_ERROR_ON_LOC(ptr == nullptr, function, file, line); + }; + + for_each(is_nullptr, std::forward(pointers)...); +} +#define ARM_COMPUTE_ERROR_ON_NULLPTR(...) ::arm_compute::error_on_nullptr(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed window is invalid. + * + * The subwindow is invalid if: + * - It is not a valid window. + * - Its dimensions don't match the full window's ones + * - The step for each of its dimension is not identical to the corresponding one of the full window. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] full Full size window + * @param[in] win Window to validate. + */ +void error_on_mismatching_windows(const char *function, const char *file, const int line, + const Window &full, const Window &win); +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) ::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w) + +/** Throw an error if the passed subwindow is invalid. + * + * The subwindow is invalid if: + * - It is not a valid window. + * - It is not fully contained inside the full window + * - The step for each of its dimension is not identical to the corresponding one of the full window. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] full Full size window + * @param[in] sub Sub-window to validate. + */ +void error_on_invalid_subwindow(const char *function, const char *file, const int line, + const Window &full, const Window &sub); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) ::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s) + +/** Throw an error if the passed coordinates have too many dimensions. + * + * The coordinates have too many dimensions if any of the dimensions greater or equal to max_dim is different from 0. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] pos Coordinates to validate + * @param[in] max_dim Maximum number of dimensions allowed. + */ +void error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line, + const Coordinates &pos, unsigned int max_dim); +#define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) ::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md) + +/** Throw an error if the passed window has too many dimensions. + * + * The window has too many dimensions if any of the dimension greater or equal to max_dim is different from 0. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] win Window to validate + * @param[in] max_dim Maximum number of dimensions allowed. + */ +void error_on_window_dimensions_gte(const char *function, const char *file, const int line, + const Window &win, unsigned int max_dim); +#define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) ::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md) + +/** Throw an error if the passed dimension objects differ. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] dim1 The first object to be compared. + * @param[in] dim2 The second object to be compared. + * @param[in] dims (Optional) Further allowed objects. + */ +template +void error_on_mismatching_dimensions(const char *function, const char *file, int line, + const Dimensions &dim1, const Dimensions &dim2, Ts &&... dims) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + + for_each(detail::compare_dimension(dim1, function, file, line), dim2, std::forward(dims)...); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed two tensors have different shapes from the given dimension + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_shapes(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward(tensors)...); +} + +/** Throw an error if the passed two tensors have different shapes from the given dimension + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] upper_dim The dimension from which to check. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_shapes(const char *function, const char *file, const int line, + unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + + const std::array < const ITensor *, 2 + sizeof...(Ts) > tensors_array{ { tensor_1, tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC(tensors_array.cbegin() == nullptr, function, file, line); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_array.cbegin()), tensors_array.cend(), [&](const ITensor * tensor) + { + ARM_COMPUTE_ERROR_ON_LOC(tensor == nullptr, function, file, line); + return detail::have_different_dimensions((*tensors_array.cbegin())->info()->tensor_shape(), tensor->info()->tensor_shape(), upper_dim); + }), + function, file, line, "Tensors have different shapes"); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) ::arm_compute::error_on_mismatching_shapes(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed two tensors have different data types + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_data_types(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + ARM_COMPUTE_UNUSED(tensor_1); + ARM_COMPUTE_UNUSED(tensor_2); + + DataType &&first_data_type = tensor_1->info()->data_type(); + ARM_COMPUTE_UNUSED(first_data_type); + + const std::array tensors_array{ { std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_2->info()->data_type() != first_data_type || std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->data_type() != first_data_type; + }), + function, file, line, "Tensors have different data types"); +} + +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed tensors have different fixed point data types or different fixed point positions + * + * @note: If the first tensor doesn't have fixed point data type, the function returns without throwing an error + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_fixed_point(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + ARM_COMPUTE_UNUSED(tensor_1); + ARM_COMPUTE_UNUSED(tensor_2); + + DataType &&first_data_type = tensor_1->info()->data_type(); + const int first_fixed_point_position = tensor_1->info()->fixed_point_position(); + ARM_COMPUTE_UNUSED(first_data_type); + ARM_COMPUTE_UNUSED(first_fixed_point_position); + + if((first_data_type != DataType::QS8) && (first_data_type != DataType::QS16)) + { + return; + } + + const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_array{ { tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->data_type() != first_data_type; + }), + function, file, line, "Tensors have different fixed point data types"); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->fixed_point_position() != first_fixed_point_position; + }), + function, file, line, "Tensors have different fixed point positions"); +} + +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(...) ::arm_compute::error_on_mismatching_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] object Tensor/multi-image to validate. + * @param[in] format First format allowed. + * @param[in] formats (Optional) Further allowed formats. + */ +template +void error_on_format_not_in(const char *function, const char *file, const int line, + const T *object, F &&format, Fs &&... formats) +{ + ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line); + + Format &&object_format = object->info()->format(); + ARM_COMPUTE_UNUSED(object_format); + + ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line); + + const std::array formats_array{ { std::forward(formats)... } }; + ARM_COMPUTE_UNUSED(formats_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f) + { + return f == object_format; + }), + function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str()); +} +#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) + +/** Throw an error if the data type of the passed tensor does not match any of the data types provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * @param[in] dt First data type allowed. + * @param[in] dts (Optional) Further allowed data types. + */ +template +void error_on_data_type_not_in(const char *function, const char *file, const int line, + const ITensor *tensor, T &&dt, Ts &&... dts) +{ + ARM_COMPUTE_ERROR_ON_LOC(tensor == nullptr, function, file, line); + + const DataType &tensor_dt = tensor->info()->data_type(); //NOLINT + ARM_COMPUTE_UNUSED(tensor_dt); + + ARM_COMPUTE_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line); + + const std::array dts_array{ { std::forward(dts)... } }; + ARM_COMPUTE_UNUSED(dts_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d) + { + return d == tensor_dt; + }), + function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str()); +} +#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t, ...) ::arm_compute::error_on_data_type_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) + +/** Throw an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * @param[in] num_channels Number of channels to check + * @param[in] dt First data type allowed. + * @param[in] dts (Optional) Further allowed data types. + */ +template +void error_on_data_type_channel_not_in(const char *function, const char *file, const int line, + const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts) +{ + error_on_data_type_not_in(function, file, line, tensor, std::forward(dt), std::forward(dts)...); + + const size_t tensor_nc = tensor->info()->num_channels(); + ARM_COMPUTE_UNUSED(tensor_nc); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_nc != num_channels, function, file, line, "Number of channels %d. Required number of channels %d", tensor_nc, num_channels); +} +#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__) + +/** Throw an error if the tensor is not 2D. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + */ +void error_on_tensor_not_2d(const char *function, const char *file, const int line, + const ITensor *tensor); +#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) ::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t) + +/** Throw an error if the channel is not in channels. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] cn Input channel + * @param[in] channel First channel allowed. + * @param[in] channels (Optional) Further allowed channels. + */ +template +void error_on_channel_not_in(const char *function, const char *file, const int line, + T cn, T &&channel, Ts &&... channels) +{ + ARM_COMPUTE_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line); + + const std::array channels_array{ { std::forward(channels)... } }; + ARM_COMPUTE_UNUSED(channels_array); + ARM_COMPUTE_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f) + { + return f == cn; + }), + function, file, line); +} +#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) ::arm_compute::error_on_channel_not_in(__func__, __FILE__, __LINE__, c, __VA_ARGS__) + +/** Throw an error if the channel is not in format. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] fmt Input channel + * @param[in] cn First channel allowed. + */ +void error_on_channel_not_in_known_format(const char *function, const char *file, const int line, + Format fmt, Channel cn); +#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) ::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c) + +/** Throw an error if the @ref IMultiHOG container is invalid + * + * An @ref IMultiHOG container is invalid if: + * + * -# it is a nullptr + * -# it doesn't contain models + * -# it doesn't have the HOG data objects with the same phase_type, normalization_type and l2_hyst_threshold (if normalization_type == L2HYS_NORM) + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] multi_hog IMultiHOG container to validate + */ +void error_on_invalid_multi_hog(const char *function, const char *file, const int line, + const IMultiHOG *multi_hog); +#define ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(m) ::arm_compute::error_on_invalid_multi_hog(__func__, __FILE__, __LINE__, m) + +/** Throw an error if the kernel is not configured. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] kernel Kernel to validate. + */ +void error_on_unconfigured_kernel(const char *function, const char *file, const int line, + const IKernel *kernel); +#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) ::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k) + +/** Throw an error if if the coordinates and shape of the subtensor are within the parent tensor. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] parent_shape Parent tensor shape + * @param[in] coords Coordinates inside the parent tensor where the first element of the subtensor is + * @param[in] shape Shape of the subtensor + */ +void error_on_invalid_subtensor(const char *function, const char *file, const int line, + const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) ::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s) + +/** Throw an error if the valid region of a subtensor is not inside the valid region of the parent tensor. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] parent_valid_region Parent valid region. + * @param[in] valid_region Valid region of subtensor. + */ +void error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line, + const ValidRegion &parent_valid_region, const ValidRegion &valid_region); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv) + +/** Throw an error if the input fixed-point positions are different. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_fixed_point_position(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_array{ { tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->fixed_point_position() != tensor_1->info()->fixed_point_position(); + }), + function, file, line, "Tensors have different fixed-point positions"); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(...) ::arm_compute::error_on_mismatching_fixed_point_position(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the fixed-point value is not representable in the specified Q format. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] value The floating point value to be checked. + * @param[in] tensor Input tensor that has information on data type and fixed-point position. + */ +template +void error_on_value_not_representable_in_fixed_point(const char *function, const char *file, int line, + float value, const ITensor *tensor) +{ + const int fixed_point_position = tensor->info()->fixed_point_position(); + const DataType dt = tensor->info()->data_type(); + const unsigned int q_max_range = 0xFFFFFFFFu >> (((sizeof(unsigned int) - element_size_from_data_type(dt)) * 8) + 1); + const float max_range = q_max_range / (static_cast(1 << fixed_point_position)); + ARM_COMPUTE_UNUSED(max_range); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(value > max_range, function, file, line, + "Value %f is not representable in %s with fixed-point position %d", value, string_from_data_type(dt).c_str(), fixed_point_position); +} +#define ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(...) ::arm_compute::error_on_value_not_representable_in_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__) +} +#endif /* __ARM_COMPUTE_VALIDATE_H__*/ diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h new file mode 100644 index 0000000000..6e7ef22531 --- /dev/null +++ b/arm_compute/core/Window.h @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_WINDOW_H__ +#define __ARM_COMPUTE_WINDOW_H__ + +#include +#include +#include + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Utils.h" + +namespace arm_compute +{ +/** Describe a multidimensional execution window. */ +class Window +{ +public: + /** Alias for dimension 0 also known as X dimension */ + static constexpr size_t DimX = 0; + /** Alias for dimension 1 also known as Y dimension */ + static constexpr size_t DimY = 1; + /** Alias for dimension 2 also known as Z dimension */ + static constexpr size_t DimZ = 2; + + /** Default constructor: create a window containing a single element. */ + constexpr Window() + : _dims(), _thread_id(0), _num_threads(1) + { + } + /** Copy constructor + * + * @param[in] src Copy the values from src to a new object + */ + Window(const Window &src); + + /** Describe one of the image's dimensions with a start, end and step. + * + * Iteration through the elements of the dimension is done like this: + * for(int v = start(); v < end(); v += step()) + * { + * ... + * } + */ + class Dimension + { + public: + /** Constructor, by default creates a dimension of 1. + * + * @param[in] start Start of the dimension + * @param[in] end End of the dimension + * @param[in] step Step between two elements of the dimension when iterating. + * + */ + constexpr Dimension(int start = 0, int end = 1, int step = 1) + : _start(start), _end(end), _step(step) + { + } + /** Default assignment operator to allow dimensions to be copied */ + Dimension &operator=(const Dimension &d) = default; + /** Return the start of the dimension */ + constexpr int start() const + { + return _start; + } + /** Return the end of the dimension */ + constexpr int end() const + { + return _end; + } + /** Return the step of the dimension */ + constexpr int step() const + { + return _step; + } + /** Set the dimension's step + * + * @param[in] step The new step + */ + void set_step(int step) + { + _step = step; + } + + private: + int _start; /**< Start of the dimension */ + int _end; /**< End of the dimension */ + int _step; + }; + + /** Read only access to a given dimension of the window + * + * @note Precondition: dimension < Coordinates::num_max_dimensions + * + * @param[in] dimension The dimension to access + * + * @return The requested dimension + */ + constexpr const Dimension &operator[](size_t dimension) const; + + /** Alias to access the first dimension of the window + * + * @return First dimension of the window + */ + constexpr const Dimension &x() const + { + return _dims.at(Window::DimX); + } + + /** Alias to access the second dimension of the window + * + * @return Second dimension of the window + */ + constexpr const Dimension &y() const + { + return _dims.at(Window::DimY); + } + + /** Alias to access the third dimension of the window + * + * @return Third dimension of the window + */ + constexpr const Dimension &z() const + { + return _dims.at(Window::DimZ); + } + + /** Set the values of a given dimension + * + * @param[in] dimension The dimension to set + * @param[in] dim The values to set the dimension to + */ + void set(size_t dimension, const Dimension &dim); + + /** Use the tensor's dimensions to fill the window dimensions. + * + * @param[in] info Tensor information to copy the dimensions from. + * @param[in] first_dimension Only copy dimensions which are greater or equal to this value. + */ + void use_tensor_dimensions(const ITensorInfo *info, size_t first_dimension = Window::DimX); + + /** Shift the values of a given dimension by the given shift_value + * + * @param[in] dimension The dimension to shift + * @param[in] shift_value Value to shift the start and end values of. + */ + void shift(size_t dimension, int shift_value); + + /** Adjust the start or end of a given dimension by the given value + * + * @param[in] dimension The dimension to adjust + * @param[in] adjust_value The adjusted value. + * @param[in] is_at_start The flag to indicate whether adjust the start or end of the dimension. + */ + void adjust(size_t dimension, int adjust_value, bool is_at_start); + + /** Scale the values of a given dimension by the given scale_value + * + * @note The end of the window is rounded up to be a multiple of step after the scaling. + * + * @param[in] dimension The dimension to scale + * @param[in] scale_value Value to scale the start, end and step values of. + */ + void scale(size_t dimension, float scale_value); + + /** Set the step of a given dimension. + * + * @param[in] dimension Dimension to update + * @param[in] step The new dimension's step value + */ + void set_dimension_step(size_t dimension, int step); + + /** Will validate all the window's dimensions' values when asserts are enabled + * + * No-op when asserts are disabled + */ + void validate() const; + + /** Return the number of iterations needed to iterate through a given dimension + * + * @param[in] dimension The requested dimension + * + * @return The number of iterations + */ + constexpr size_t num_iterations(size_t dimension) const; + + /** Split a window into a set of sub windows along a given dimension + * + * For example to split a window into 3 sub-windows along the Y axis, you would have to do:
+ * Window sub0 = window.split_window( 1, 0, 3);
+ * Window sub1 = window.split_window( 1, 1, 3);
+ * Window sub2 = window.split_window( 1, 2, 3);
+ * + * @param[in] dimension Dimension along which the split will be performed + * @param[in] id Id of the sub-window to return. Must be in the range (0, total-1) + * @param[in] total Total number of sub-windows the window will be split into. + * + * @return The subwindow "id" out of "total" + */ + Window split_window(size_t dimension, size_t id, size_t total) const; + /** First 1D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_1D() const + { + return first_slice_window<1>(); + }; + /** First 2D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_2D() const + { + return first_slice_window<2>(); + }; + /** First 3D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_3D() const + { + return first_slice_window<3>(); + }; + /** Slide the passed 1D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_1D(Window &slice) const + { + return slide_window_slice<1>(slice); + } + /** Slide the passed 2D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_2D(Window &slice) const + { + return slide_window_slice<2>(slice); + } + /** Slide the passed 3D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_3D(Window &slice) const + { + return slide_window_slice<3>(slice); + } + /** Slide the passed 4D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_4D(Window &slice) const + { + return slide_window_slice<4>(slice); + } + /** Sets the ID of the thread that the window is associated with. + * + * @param id ID of the thread that the window is associated with. + */ + void set_thread_id(unsigned int id) + { + _thread_id = id; + } + /** Sets the number of threads dispatched that the window is associated with. + * + * @param num_threads The number of threads dispatched that the window is associated with. + */ + void set_num_threads(unsigned int num_threads) + { + _num_threads = num_threads; + } + /** Get the ID of the thread that the window is associated with. + * + * @return ID of the thread that the window is associated with. + */ + constexpr unsigned int thread_id() const + { + return _thread_id; + } + /** Get the number of threads dispatched that the window is associated with. + * + * @return The number of threads dispatched that the window is associated with. + */ + constexpr unsigned int num_threads() const + { + return _num_threads; + } + +private: + /** First slice of the window + * + * @return The first slice of the window. + */ + template + Window first_slice_window() const; + + /** Slide the passed window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + template + bool slide_window_slice(Window &slice) const; + +private: + std::array _dims; + unsigned int _thread_id; + unsigned int _num_threads; +}; +} +#include "Window.inl" +#endif /*__ARM_COMPUTE_WINDOW_H__ */ diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl new file mode 100644 index 0000000000..75428a145b --- /dev/null +++ b/arm_compute/core/Window.inl @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +namespace arm_compute +{ +inline Window::Window(const Window &src) + : _dims(), _thread_id(src._thread_id), _num_threads(src._num_threads) +{ + for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + { + set(i, src[i]); + } +} + +inline constexpr const Window::Dimension &Window::operator[](const size_t dimension) const +{ + // Precondition: dimension < Coordinates::num_max_dimensions + return _dims.at(dimension); +} +inline void Window::set(const size_t dimension, const Window::Dimension &dim) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + _dims[dimension] = dim; +} + +inline void Window::shift(const size_t dimension, const int shift_value) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + d = Window::Dimension(d.start() + shift_value, d.end() + shift_value, d.step()); +} + +inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + + if(is_at_start) + { + d = Window::Dimension(d.start() + adjust_value, d.end(), d.step()); + } + else + { + d = Window::Dimension(d.start(), d.end() + adjust_value, d.step()); + } +} + +inline void Window::scale(const size_t dimension, float scale_value) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + const int scaled_step = d.step() * scale_value; + const int scaled_end = ceil_to_multiple(d.end() * scale_value, scaled_step); + d = Window::Dimension(d.start() * scale_value, scaled_end, scaled_step); +} + +inline void Window::set_dimension_step(const size_t dimension, const int step) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + _dims[dimension].set_step(step); +} + +inline void Window::validate() const +{ + for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_ERROR_ON(_dims[i].step() == 0); + ARM_COMPUTE_ERROR_ON(_dims[i].end() <= _dims[i].start()); + ARM_COMPUTE_ERROR_ON((_dims[i].end() - _dims[i].start()) % _dims[i].step()); + } +} + +inline constexpr size_t Window::num_iterations(size_t dimension) const +{ + // Precondition: dimension < Coordinates::num_max_dimensions + // Precondition: (end - start) % step == 0 + return (_dims.at(dimension).end() - _dims.at(dimension).start()) / _dims.at(dimension).step(); +} + +inline Window Window::split_window(const size_t dimension, const size_t id, const size_t total) const +{ + ARM_COMPUTE_ERROR_ON(id >= total); + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + + Window out; + + for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + { + if(d == dimension) + { + int start = _dims[d].start(); + int end = _dims[d].end(); + int per_sub_window = (num_iterations(d) / total) * _dims[d].step(); + + start += id * per_sub_window; + + if(id != total - 1) + { + end = start + per_sub_window; + } + + out.set(d, Dimension(start, end, _dims[d].step())); + } + else + { + out.set(d, _dims[d]); + } + } + + return out; +} + +template +inline bool Window::slide_window_slice(Window &slice) const +{ + for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + { + // Did we reach the end of this dimension? + const int v = slice._dims[n].start() + 1; + + if(v < _dims[n].end()) + { + // No: increment + slice._dims[n] = Dimension(v, v + 1, 1); + + // Reset lower dimensions: + for(unsigned int lower = window_dimension; lower < n; ++lower) + { + slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1); + } + return true; + } + } + + // It was the last slice + return false; // Iteration over +} + +template +inline Window Window::first_slice_window() const +{ + Window slice; + + std::copy_n(_dims.begin(), window_dimension, slice._dims.begin()); + + //Initialise higher dimensions to be the first slice. + for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + { + slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1); + } + + return slice; +} + +inline void Window::use_tensor_dimensions(const ITensorInfo *info, const size_t first_dimension) +{ + for(unsigned int n = first_dimension; n < info->num_dimensions(); ++n) + { + set(n, Window::Dimension(0, std::max(info->dimension(n), static_cast(1)))); + } +} +} diff --git a/arm_compute/runtime/Array.h b/arm_compute/runtime/Array.h new file mode 100644 index 0000000000..c8a240e428 --- /dev/null +++ b/arm_compute/runtime/Array.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ARRAY_H__ +#define __ARM_COMPUTE_ARRAY_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of the IArray interface which allocates a static number of T values */ +template +class Array : public IArray +{ +public: + /** Default constructor: empty array */ + Array() + : IArray(0), _values(nullptr) + { + } + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + Array(size_t max_num_values) + : IArray(max_num_values), _values(arm_compute::cpp14::make_unique(max_num_values)) + { + } + + // Inherited methods overridden: + T *buffer() const override + { + return _values.get(); + } + +private: + std::unique_ptr _values; +}; + +using KeyPointArray = Array; +using Coordinates2DArray = Array; +using DetectionWindowArray = Array; +using Size2DArray = Array; +using UInt8Array = Array; +using UInt16Array = Array; +using UInt32Array = Array; +using Int16Array = Array; +using Int32Array = Array; +using FloatArray = Array; +} +#endif /* __ARM_COMPUTE_ARRAY_H__ */ diff --git a/arm_compute/runtime/CL/CLArray.h b/arm_compute/runtime/CL/CLArray.h new file mode 100644 index 0000000000..f4c2ef06d9 --- /dev/null +++ b/arm_compute/runtime/CL/CLArray.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARRAY_H__ +#define __ARM_COMPUTE_CLARRAY_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +namespace arm_compute +{ +/** CLArray implementation */ +template +class CLArray : public ICLArray +{ +public: + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArray(const CLArray &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLArray &operator=(const CLArray &) = delete; + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + CLArray(size_t max_num_values) + : ICLArray(max_num_values), _buffer(cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, max_num_values * sizeof(T))) + { + } + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true) + { + ICLArray::map(CLScheduler::get().queue(), blocking); + } + using ICLArray::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap() + { + ICLArray::unmap(CLScheduler::get().queue()); + } + using ICLArray::unmap; + + // Inherited methods overridden: + const cl::Buffer &cl_buffer() const override + { + return _buffer; + } + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override + { + ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get()); + return static_cast(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, this->max_num_values() * sizeof(T))); + } + void do_unmap(cl::CommandQueue &q, uint8_t *mapping) override + { + ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get()); + q.enqueueUnmapMemObject(_buffer, mapping); + } + +private: + cl::Buffer _buffer; +}; + +using CLKeyPointArray = CLArray; +using CLCoordinates2DArray = CLArray; +using CLDetectionWindowArray = CLArray; +using CLSize2DArray = CLArray; +using CLUInt8Array = CLArray; +using CLUInt16Array = CLArray; +using CLUInt32Array = CLArray; +using CLInt16Array = CLArray; +using CLInt32Array = CLArray; +using CLFloatArray = CLArray; +} +#endif /* __ARM_COMPUTE_CLARRAY_H__ */ diff --git a/arm_compute/runtime/CL/CLDistribution1D.h b/arm_compute/runtime/CL/CLDistribution1D.h new file mode 100644 index 0000000000..55dd1247ed --- /dev/null +++ b/arm_compute/runtime/CL/CLDistribution1D.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_CLDISTRIBUTION1D_H__ + +#include "arm_compute/core/CL/ICLDistribution1D.h" +#include "arm_compute/core/CL/OpenCL.h" + +#include +#include + +namespace arm_compute +{ +/** CLDistribution1D object class */ +class CLDistribution1D : public ICLDistribution1D +{ +public: + /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + CLDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDistribution1D(const CLDistribution1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDistribution1D &operator=(const CLDistribution1D &) = delete; + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLDistribution1D::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLDistribution1D::unmap; + + // Inherited methods overridden: + cl::Buffer &cl_buffer() override; + +protected: + // Inherited methods overridden: + uint32_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + cl::Buffer _mem; +}; +} +#endif /* __ARM_COMPUTE_CLDISTRIBUTION1D_H__ */ diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h new file mode 100644 index 0000000000..82929ba139 --- /dev/null +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFUNCTIONS_H__ +#define __ARM_COMPUTE_CLFUNCTIONS_H__ + +/* Header regrouping all the CL functions */ +#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h" +#include "arm_compute/runtime/CL/functions/CLAccumulate.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h" +#include "arm_compute/runtime/CL/functions/CLBox3x3.h" +#include "arm_compute/runtime/CL/functions/CLCannyEdge.h" +#include "arm_compute/runtime/CL/functions/CLChannelCombine.h" +#include "arm_compute/runtime/CL/functions/CLChannelExtract.h" +#include "arm_compute/runtime/CL/functions/CLColorConvert.h" +#include "arm_compute/runtime/CL/functions/CLConvolution.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLDerivative.h" +#include "arm_compute/runtime/CL/functions/CLDilate.h" +#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h" +#include "arm_compute/runtime/CL/functions/CLErode.h" +#include "arm_compute/runtime/CL/functions/CLFastCorners.h" +#include "arm_compute/runtime/CL/functions/CLFillBorder.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" +#include "arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h" +#include "arm_compute/runtime/CL/functions/CLGEMMLowp.h" +#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h" +#include "arm_compute/runtime/CL/functions/CLHOGDetector.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h" +#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h" +#include "arm_compute/runtime/CL/functions/CLHistogram.h" +#include "arm_compute/runtime/CL/functions/CLIntegralImage.h" +#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h" +#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h" +#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLMagnitude.h" +#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" +#include "arm_compute/runtime/CL/functions/CLMedian3x3.h" +#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h" +#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h" +#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" +#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h" +#include "arm_compute/runtime/CL/functions/CLPhase.h" +#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" +#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" +#include "arm_compute/runtime/CL/functions/CLRemap.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel5x5.h" +#include "arm_compute/runtime/CL/functions/CLSobel7x7.h" +#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" +#include "arm_compute/runtime/CL/functions/CLTableLookup.h" +#include "arm_compute/runtime/CL/functions/CLThreshold.h" +#include "arm_compute/runtime/CL/functions/CLTranspose.h" +#include "arm_compute/runtime/CL/functions/CLWarpAffine.h" +#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h" + +#endif /* __ARM_COMPUTE_CLFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/CL/CLHOG.h b/arm_compute/runtime/CL/CLHOG.h new file mode 100644 index 0000000000..9b4a303eca --- /dev/null +++ b/arm_compute/runtime/CL/CLHOG.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOG_H__ +#define __ARM_COMPUTE_CLHOG_H__ + +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** OpenCL implementation of HOG data-object */ +class CLHOG : public ICLHOG +{ +public: + /** Default constructor */ + CLHOG(); + /** Allocate the HOG descriptor using the given HOG's metadata + * + * @param[in] input HOG's metadata used to allocate the HOG descriptor + */ + void init(const HOGInfo &input); + + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLHOG::map; + + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLHOG::unmap; + + // Inherited method overridden: + void free() override; + const HOGInfo *info() const override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + HOGInfo _info; + cl::Buffer _buffer; +}; +} +#endif /* __ARM_COMPUTE_CLHOG_H__ */ diff --git a/arm_compute/runtime/CL/CLLut.h b/arm_compute/runtime/CL/CLLut.h new file mode 100644 index 0000000000..9bac2b44c3 --- /dev/null +++ b/arm_compute/runtime/CL/CLLut.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLUT_H__ +#define __ARM_COMPUTE_CLLUT_H__ + +#include "arm_compute/core/CL/ICLLut.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLLutAllocator.h" + +#include +#include + +namespace arm_compute +{ +class ILutAllocator; + +/** Basic implementation of the OpenCL lut interface */ +class CLLut : public ICLLut +{ +public: + /** Constructor */ + CLLut(); + /** Constructor: initializes a LUT which can contain num_values values of data_type type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + CLLut(size_t num_elements, DataType data_type); + /** Return a pointer to the lut's allocator + * + * @return A pointer to the lut's allocator + */ + ILutAllocator *allocator(); + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLLut::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLLut::unmap; + + // Inherited methods overridden: + size_t num_elements() const override; + uint32_t index_offset() const override; + size_t size_in_bytes() const override; + DataType type() const override; + const cl::Buffer &cl_buffer() const override; + void clear() override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + CLLutAllocator _allocator; /**< Instance of the OpenCL lut allocator */ +}; +} +#endif /*__ARM_COMPUTE_CLLUT_H__ */ diff --git a/arm_compute/runtime/CL/CLLutAllocator.h b/arm_compute/runtime/CL/CLLutAllocator.h new file mode 100644 index 0000000000..4648ffb51f --- /dev/null +++ b/arm_compute/runtime/CL/CLLutAllocator.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLUTALLOCATOR_H__ +#define __ARM_COMPUTE_CLLUTALLOCATOR_H__ + +#include "arm_compute/runtime/ILutAllocator.h" + +#include "arm_compute/core/CL/OpenCL.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of a CL memory LUT allocator. */ +class CLLutAllocator : public ILutAllocator +{ +public: + /** Default constructor. */ + CLLutAllocator(); + /** Default destructor. */ + ~CLLutAllocator() = default; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLLutAllocator(const CLLutAllocator &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + const CLLutAllocator &operator=(const CLLutAllocator &) = delete; + /** Interface to be implemented by the child class to return the pointer to the mapped data. */ + uint8_t *data(); + /** Interface to be implemented by the child class to return the pointer to the CL data. */ + const cl::Buffer &cl_data() const; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + uint8_t *map(cl::CommandQueue &q, bool blocking); + /** Enqueue an unmap operation of the allocated buffer on the given queue. + * + * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping The cpu mapping to unmap. + */ + void unmap(cl::CommandQueue &q, uint8_t *mapping); + +protected: + /** Allocate num_elements() * sizeof(type()) of OpenCL memory. */ + void allocate() override; + /** Call map() on the OpenCL buffer. + * + * @return A pointer to the beginning of the LUT's allocation. + */ + uint8_t *lock() override; + /** Call unmap() on the OpenCL buffer. */ + void unlock() override; + +private: + cl::Buffer _buffer; /**< OpenCL buffer containing the LUT data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ +}; +} + +#endif /* __ARM_COMPUTE_CLLUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/CLMultiHOG.h b/arm_compute/runtime/CL/CLMultiHOG.h new file mode 100644 index 0000000000..17bb4e03c1 --- /dev/null +++ b/arm_compute/runtime/CL/CLMultiHOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMULTIHOG_H__ +#define __ARM_COMPUTE_CLMULTIHOG_H__ + +#include "arm_compute/core/CL/ICLMultiHOG.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLHOG.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of the CL multi HOG data-objects */ +class CLMultiHOG : public ICLMultiHOG +{ +public: + /** Constructor + * + * @param[in] num_models Number of HOG data objects to contain + * + */ + CLMultiHOG(size_t num_models); + + // Inherited methods overridden: + size_t num_models() const override; + ICLHOG *cl_model(size_t index) override; + const ICLHOG *cl_model(size_t index) const override; + +private: + size_t _num_models; + std::unique_ptr _model; +}; +} +#endif /*__ARM_COMPUTE_CLMULTIHOG_H__ */ diff --git a/arm_compute/runtime/CL/CLMultiImage.h b/arm_compute/runtime/CL/CLMultiImage.h new file mode 100644 index 0000000000..f70929db07 --- /dev/null +++ b/arm_compute/runtime/CL/CLMultiImage.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMULTIIMAGE_H__ +#define __ARM_COMPUTE_CLMULTIIMAGE_H__ + +#include "arm_compute/core/CL/ICLMultiImage.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic implementation of the CL multi-planar image interface */ +class CLMultiImage : public ICLMultiImage +{ +public: + /** Constructor */ + CLMultiImage(); + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Heigth of the whole image + * @param[in] format Format of the whole image + */ + void init(unsigned int width, unsigned int height, Format format); + /** Init the multi-planar image + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init_auto_padding(unsigned int width, unsigned int height, Format format); + /** Allocated a previously initialised multi image + * + * @note The multi image must not already be allocated when calling this function. + * + **/ + void allocate(); + + // Inherited methods overridden: + const MultiImageInfo *info() const override; + CLImage *cl_plane(unsigned int index) override; + const CLImage *cl_plane(unsigned int index) const override; + +private: + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + * @param[in] auto_padding Specifies whether the image uses auto padding + */ + void internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding); + + MultiImageInfo _info; /** Instance of the multi-planar image's meta data */ + std::array _plane; /* Instance CLImage to hold the planar's information */ +}; +} +#endif /*__ARM_COMPUTE_CLMULTIIMAGE_H__ */ diff --git a/arm_compute/runtime/CL/CLPyramid.h b/arm_compute/runtime/CL/CLPyramid.h new file mode 100644 index 0000000000..5e0afb3c63 --- /dev/null +++ b/arm_compute/runtime/CL/CLPyramid.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPYRAMID_H__ +#define __ARM_COMPUTE_CLPYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include +#include + +namespace arm_compute +{ +class CLTensor; + +/** Basic implementation of the OpenCL pyramid interface */ +class CLPyramid : public IPyramid +{ +public: + /** Default constructor */ + CLPyramid(); + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + */ + void init(const PyramidInfo &info); + + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] info Pyramid's metadata + */ + void init_auto_padding(const PyramidInfo &info); + + /** Allocate the planes in the pyramid + * + * @note The pyramid must not already be allocated when calling this function. + * + **/ + void allocate(); + + // Inherited method overridden + const PyramidInfo *info() const override; + CLTensor *get_pyramid_level(size_t index) const override; + +private: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + * @param[in] auto_padding Specifies whether the image in the pyramid use auto padding + */ + void internal_init(const PyramidInfo &info, bool auto_padding); + + PyramidInfo _info; + std::unique_ptr _pyramid; +}; +} +#endif /*__ARM_COMPUTE_CLPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h new file mode 100644 index 0000000000..8e80259b59 --- /dev/null +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHEDULER_H__ +#define __ARM_COMPUTE_CLSCHEDULER_H__ + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLKernel; + +/** Provides global access to a CL context and command queue. */ +class CLScheduler +{ +private: + /** Constructor */ + CLScheduler(); + +public: + /** Access the scheduler singleton. + * + * @return The scheduler + */ + static CLScheduler &get(); + /** Initialises the context and command queue used by the scheduler to default values + * and sets a default device and kernel path for the @ref CLKernelLibrary. + */ + void default_init() + { + CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault()); + init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault()); + } + /** Schedule the execution of the passed kernel if possible. + * + * @param[in] kernel Kernel to execute. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + */ + void enqueue(ICLKernel &kernel, bool flush = true); + + /** Initialises the context and command queue to be used by the scheduler. + * + * @param[in] context A CL context. + * @param[in] queue A CL command queue. + * @param[in] device A CL device. + */ + void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(), + cl::Device device = cl::Device::getDefault()) + { + _context = std::move(context); + _queue = std::move(queue); + _target = get_target_from_device(device); + } + + /** Accessor for the associated CL context. + * + * @return A CL context. + */ + cl::Context &context() + { + return _context; + } + + /** Accessor to set the CL context to be used by the scheduler. + * + * @param[in] context A CL context. + */ + void set_context(cl::Context context) + { + _context = std::move(context); + } + + /** Accessor for the associated CL command queue. + * + * @return A CL command queue. + */ + cl::CommandQueue &queue() + { + return _queue; + } + + /** Get the target GPU. + * + * @return The target GPU. + */ + GPUTarget target() const + { + return _target; + } + + /** Accessor to set the CL command queue to be used by the scheduler. + * + * @param[in] queue A CL command queue. + */ + void set_queue(cl::CommandQueue queue) + { + _queue = std::move(queue); + } + + /** Accessor to set target GPU to be used by the scheduler. + * + * @param[in] target The target GPU. + */ + void set_target(GPUTarget target) + { + _target = target; + } + + /** Blocks until all commands in the associated command queue have finished. */ + void sync() + { + _queue.finish(); + } + + /** Enqueues a marker into the associated command queue and return the event. + * + * @return An event that can be waited on to block the executing thread. + */ + cl::Event enqueue_sync_event() + { + cl::Event event; + _queue.enqueueMarker(&event); + + return event; + } + +private: + cl::Context _context; + cl::CommandQueue _queue; + GPUTarget _target; +}; +} +#endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */ diff --git a/arm_compute/runtime/CL/CLSubTensor.h b/arm_compute/runtime/CL/CLSubTensor.h new file mode 100644 index 0000000000..4bab164779 --- /dev/null +++ b/arm_compute/runtime/CL/CLSubTensor.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSUBTENSOR_H__ +#define __ARM_COMPUTE_CLSUBTENSOR_H__ + +#include "arm_compute/core/SubTensorInfo.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the OpenCL sub-tensor interface */ +class CLSubTensor : public ICLTensor +{ +public: + /** Constructor + * + * @param[in] parent Parent tensor + * @param[in] tensor_shape Shape of the subtensor + * @param[in] coords Coordinates of the first subtensor element inside the parent tensor. + */ + CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Destructor: free the tensor's memory */ + ~CLSubTensor() = default; + /** Restrict instances of this class to be copy constructed */ + CLSubTensor(const CLSubTensor &) = delete; + /** Restrict instances of this class to be copied */ + CLSubTensor &operator=(const CLSubTensor &) = delete; + /** Allow instances of this class to be move constructed */ + CLSubTensor(CLSubTensor &&) = default; + /** Allow instances of this class to be moved */ + CLSubTensor &operator=(CLSubTensor &&) = default; + + /** Enqueue a map operation of the allocated buffer. + * + * @note Mapping a subtensor will lead to the mapping of the whole parent tensor for now. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLTensor::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note Unmapping a subtensor will lead to the unmapping of the whole parent tensor for now. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLTensor::unmap; + + /** Return the parent tensor of the subtensor + * + * @return Parent tensor + */ + ICLTensor *parent(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + ICLTensor *_parent; + mutable SubTensorInfo _info; +}; +} +#endif /*__ARM_COMPUTE_CLSUBTENSOR_H__ */ diff --git a/arm_compute/runtime/CL/CLTensor.h b/arm_compute/runtime/CL/CLTensor.h new file mode 100644 index 0000000000..2c685d1ed1 --- /dev/null +++ b/arm_compute/runtime/CL/CLTensor.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTENSOR_H__ +#define __ARM_COMPUTE_CLTENSOR_H__ + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" + +#include + +namespace arm_compute +{ +class ITensorAllocator; +class ITensorInfo; + +/** Basic implementation of the OpenCL tensor interface */ +class CLTensor : public ICLTensor +{ +public: + /** Constructor */ + CLTensor(); + /** Return a pointer to the tensor's allocator + * + * @return A pointer to the tensor's allocator + */ + ITensorAllocator *allocator(); + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLTensor::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLTensor::unmap; + + // Inherited methods overridden: + TensorInfo *info() const override; + TensorInfo *info() override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + mutable CLTensorAllocator _allocator; /**< Instance of the OpenCL tensor allocator */ +}; + +using CLImage = CLTensor; +} +#endif /*__ARM_COMPUTE_CLTENSOR_H__ */ diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h new file mode 100644 index 0000000000..ed371e0642 --- /dev/null +++ b/arm_compute/runtime/CL/CLTensorAllocator.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTENSORALLOCATOR_H__ +#define __ARM_COMPUTE_CLTENSORALLOCATOR_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of a CL memory tensor allocator. */ +class CLTensorAllocator : public ITensorAllocator +{ +public: + /** Default constructor. */ + CLTensorAllocator(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLTensorAllocator(const CLTensorAllocator &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + CLTensorAllocator &operator=(const CLTensorAllocator &) = delete; + /** Allow instances of this class to be moved */ + CLTensorAllocator(CLTensorAllocator &&) = default; + /** Allow instances of this class to be moved */ + CLTensorAllocator &operator=(CLTensorAllocator &&) = default; + /** Default destructor */ + ~CLTensorAllocator() = default; + + /** Interface to be implemented by the child class to return the pointer to the mapped data. */ + uint8_t *data(); + /** Interface to be implemented by the child class to return the pointer to the CL data. */ + const cl::Buffer &cl_data() const; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + uint8_t *map(cl::CommandQueue &q, bool blocking); + /** Enqueue an unmap operation of the allocated buffer on the given queue. + * + * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping The cpu mapping to unmap. + */ + void unmap(cl::CommandQueue &q, uint8_t *mapping); + + /** Allocate size specified by TensorInfo of OpenCL memory. + * + * @note: The tensor must not already be allocated when calling this function. + * + */ + void allocate() override; + + /** Free allocated OpenCL memory. + * + * @note The tensor must have been allocated when calling this function. + * + */ + void free() override; + +protected: + /** Call map() on the OpenCL buffer. + * + * @return A pointer to the beginning of the tensor's allocation. + */ + uint8_t *lock() override; + /** Call unmap() on the OpenCL buffer. */ + void unlock() override; + +private: + cl::Buffer _buffer; /**< OpenCL buffer containing the tensor data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ +}; +} +#endif /* __ARM_COMPUTE_CLTENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/ICLSimpleFunction.h b/arm_compute/runtime/CL/ICLSimpleFunction.h new file mode 100644 index 0000000000..130c58a98c --- /dev/null +++ b/arm_compute/runtime/CL/ICLSimpleFunction.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ +#define __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single OpenCL kernel */ +class ICLSimpleFunction : public IFunction +{ +public: + /** Default constructor */ + ICLSimpleFunction(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ + CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h new file mode 100644 index 0000000000..40ee396644 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ +#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLAbsoluteDifferenceKernel + * + * @note The tensor data types for the inputs must be U8 or S16. + * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types. + */ +class CLAbsoluteDifference : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 First input tensor. Data types supported: U8, S16 + * @param[in] input2 Second input tensor. Data types supported: U8, S16 + * @param[out] output Output tensor. Data types supported: U8, S16 + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h new file mode 100644 index 0000000000..51f6df9acb --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLAccumulate.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACCUMULATE_H__ +#define __ARM_COMPUTE_CLACCUMULATE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLAccumulateKernel */ +class CLAccumulate : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); +}; + +/** Basic function to run @ref CLAccumulateWeightedKernel */ +class CLAccumulateWeighted : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Basic function to run @ref CLAccumulateSquaredKernel */ +class CLAccumulateSquared : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift The input with a value input the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} +#endif /*__ARM_COMPUTE_CLACCUMULATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h new file mode 100644 index 0000000000..6468c996a2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACTIVATIONLAYER_H__ +#define __ARM_COMPUTE_CLACTIVATIONLAYER_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLActivationLayerKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class CLActivationLayer : public ICLSimpleFunction +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); +}; +} +#endif /* __ARM_COMPUTE_CLACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h new file mode 100644 index 0000000000..feadf39820 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICADDITION_H__ +#define __ARM_COMPUTE_CLARITHMETICADDITION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLArithmeticAdditionKernel + * + * @note The tensor data type for the inputs must be U8, S16, F16, F32. + * @note The function performs an arithmetic addition between two tensors. + */ +class CLArithmeticAddition : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICADDITION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h new file mode 100644 index 0000000000..d7bb21144e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ +#define __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLArithmeticSubtractionKernel + * + * @note The tensor data type for the inputs must be U8, S16, F16, F32 + * @note The function performs an arithmetic subtraction between two tensors. + */ +class CLArithmeticSubtraction : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h new file mode 100644 index 0000000000..d766d1c69c --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer. + * + * Batch normalization is calculated by: + * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * + */ +class CLBatchNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + CLBatchNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run() override; + +private: + CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */ +}; +} +#endif /* __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h new file mode 100644 index 0000000000..a4a523baaa --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEAND_H__ +#define __ARM_COMPUTE_CLBITWISEAND_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseAndKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise AND operation using the two input tensors. + */ +class CLBitwiseAnd : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEAND_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h new file mode 100644 index 0000000000..0ff16af870 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISENOT_H__ +#define __ARM_COMPUTE_CLBITWISENOT_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseNotKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise NOT operation on input tensor. + */ +class CLBitwiseNot : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISENOT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h new file mode 100644 index 0000000000..880c4762be --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEOR_H__ +#define __ARM_COMPUTE_CLBITWISEOR_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseOrKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise OR operation using the two input tensors. + */ +class CLBitwiseOr : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h new file mode 100644 index 0000000000..772dec22ea --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEXOR_H__ +#define __ARM_COMPUTE_CLBITWISEXOR_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseXorKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise XOR operation using the two input tensors. + */ +class CLBitwiseXor : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEXOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h new file mode 100644 index 0000000000..5e51c1a390 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBox3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBOX3X3_H__ +#define __ARM_COMPUTE_CLBOX3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLBox3x3Kernel + * + */ +class CLBox3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLBOX3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h new file mode 100644 index 0000000000..e5a82b2263 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCANNYEDGE_H__ +#define __ARM_COMPUTE_CLCANNYEDGE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) + * -# @ref CLSobel3x3 (if gradient_size == 3) or @ref CLSobel5x5 (if gradient_size == 5) or @ref CLSobel7x7 (if gradient_size == 7) + * -# @ref CLGradientKernel + * -# @ref CLEdgeNonMaxSuppressionKernel + * -# @ref CLEdgeTraceKernel + * + */ +class CLCannyEdge : public IFunction +{ +public: + /** Constructor */ + CLCannyEdge(); + /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis. + * @param[in] lower_thr Lower threshold used for the hysteresis. + * @param[in] gradient_size Gradient size (3, 5 or 7). + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + virtual void run() override; + +private: + std::unique_ptr _sobel; /**< Pointer to Sobel kernel. */ + CLGradientKernel _gradient; /**< Gradient kernel. */ + CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ + CLEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel. */ + CLEdgeTraceKernel _edge_trace; /**< Edge tracing kernel. */ + CLImage _gx; /**< Source tensor - Gx component. */ + CLImage _gy; /**< Source tensor - Gy component. */ + CLImage _mag; /**< Source tensor - Magnitude. */ + CLImage _phase; /**< Source tensor - Phase. */ + CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */ + CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */ +}; +} + +#endif /* __ARM_COMPUTE_CLCANNYEDGE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h new file mode 100644 index 0000000000..337e6b4820 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELCOMBINE_H__ +#define __ARM_COMPUTE_CLCHANNELCOMBINE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLChannelCombineKernel to perform channel combination. */ +class CLChannelCombine : public ICLSimpleFunction +{ +public: + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output image. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_CLCHANNELCOMBINE_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h new file mode 100644 index 0000000000..1753374622 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELEXTRACT_H__ +#define __ARM_COMPUTE_CLCHANNELEXTRACT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLChannelExtractKernel to perform channel extraction. */ +class CLChannelExtract : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input tensor to extract the channel from. Formats supported: Any single planar. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image to extract channel from. + * @param[in] channel The channel to extract. + * @param[out] output The extracted 2D channel. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); +}; +} +#endif /*__ARM_COMPUTE_CLCHANNELEXTRACT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h new file mode 100644 index 0000000000..12457a0cf2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLColorConvert.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOLORCONVERT_H__ +#define __ARM_COMPUTE_CLCOLORCONVERT_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLColorConvertKernel + * + * @note The function performs color convert between images. + */ +class CLColorConvert : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input single-planar tensor from which to convert + * @param[in] output The converted single-planar output tensor + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted single-planar output image + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The single-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); +}; +} +#endif /* __ARM_COMPUTE_CLCOLORCONVERT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h new file mode 100644 index 0000000000..f526f6ff4a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLConvolution.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTION_H__ +#define __ARM_COMPUTE_CLCONVOLUTION_H__ + +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolution3x3Kernel + * + */ +class CLConvolution3x3 : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; + +/** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolutionKernel or
+ * @ref CLSeparableConvolutionHorKernel and @ref CLSeparableConvolutionVertKernel (if convolution matrix is separable) + * + */ +template +class CLConvolutionSquare : public IFunction +{ +public: + /** Default constructor */ + CLConvolutionSquare(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overriden: + void run() override; + +private: + CLTensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + CLSeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + CLSeparableConvolutionVertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + CLConvolutionKernel _kernel; /**< kernel for non-separated convolution **/ + CLFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to run 5x5 convolution. */ +using CLConvolution5x5 = CLConvolutionSquare<5>; +/** Basic function to run 7x7 convolution. */ +using CLConvolution7x7 = CLConvolutionSquare<7>; +/** Basic function to run 9x9 convolution. */ +using CLConvolution9x9 = CLConvolutionSquare<9>; + +/** Basic function to execute non-square convolution. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolutionRectangleKernel or
+ * + * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 + */ +class CLConvolutionRectangle : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] rows Rows of convolution kernel. + * @param[in] cols Columns of convolution kernel. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLCONVOLUTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h new file mode 100644 index 0000000000..6a40396f9a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Function to reshape and transpose the weights. This function calls the following kernels: + * -# @ref CLWeightsReshapeKernel + * -# @ref CLGEMMTranspose1xWKernel + */ +class CLConvolutionLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + CLConvolutionLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. + * Data types supported: Same as @p weights. + */ + void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW); + // Inherited methods overridden: + void run() override; + +private: + CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel; + CLGEMMTranspose1xWKernel _weights_transposed_kernel; + CLTensor _weights_reshaped; + bool _transpose1xW; +}; + +/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels: + * + * -# @ref CLConvolutionLayerWeightsReshapeKernel (executed only once for each configuration) + * -# @ref CLGEMMTranspose1xWKernel (executed only once for each configuration) + * -# @ref CLIm2ColKernel + * -# @ref CLGEMMInterleave4x4Kernel + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLCol2ImKernel + */ +class CLConvolutionLayer : public IFunction +{ +public: + /** Default constructor */ + CLConvolutionLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F16, F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + + // Inherited methods overridden: + void run() override; + +private: + CLConvolutionLayerReshapeWeights _reshape_weights; + CLIm2ColKernel _input_im2col_kernel; + CLGEMMInterleave4x4Kernel _input_interleave_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLCol2ImKernel _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _input_interleaved_reshaped; + CLTensor _weights_reshaped; + CLTensor _weights_transposed; + CLTensor _gemm_output; + bool _has_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; +}; +} +#endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h new file mode 100644 index 0000000000..3199936b82 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; +class CLDepthConcatenateKernel; +class CLFillBorderKernel; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref CLDepthConcatenateKernel + * + */ +class CLDepthConcatenate : public IFunction +{ +public: + /** Default constructor */ + CLDepthConcatenate(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: F32. + */ + void configure(std::vector inputs_vector, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvert.h b/arm_compute/runtime/CL/functions/CLDepthConvert.h new file mode 100644 index 0000000000..f11027656d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConvert.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLDepthConvertKernel. */ +class CLDepthConvert : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * + * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} +#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h new file mode 100644 index 0000000000..05033e8172 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDerivative.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDERIVATIVE_H__ +#define __ARM_COMPUTE_CLDERIVATIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute first order derivative operator. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLDerivativeKernel + * + */ +class CLDerivative : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data types supported: S16. + * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /* __ARM_COMPUTE_CLDERIVATIVE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h new file mode 100644 index 0000000000..8534139c86 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDilate.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDILATE_H__ +#define __ARM_COMPUTE_CLDILATE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute dilate. This function calls the following OpenCL kernels: +* +* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) +* -# @ref CLDilateKernel +* +*/ +class CLDilate : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode. + * + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_CLDILATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h new file mode 100644 index 0000000000..d7182756b5 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ +#define __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ + +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "arm_compute/runtime/CL/CLDistribution1D.h" +#include "arm_compute/runtime/CL/CLLut.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute histogram equalization. This function calls the following CL kernels: + * + * -# @ref CLHistogramKernel + * -# @ref CLTableLookupKernel + * + */ +class CLEqualizeHistogram : public IFunction +{ +public: + /** Default Constructor. */ + CLEqualizeHistogram(); + /** Initialise the kernel's inputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] output Output of same data type with equalized brightness and contrast. + */ + void configure(const ICLImage *input, ICLImage *output); + + // Inherited methods overridden: + void run() override; + +private: + CLHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */ + CLTableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + CLLut _cd_lut; /**< Holds the equalization lookuptable. */ + static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */ + static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */ +}; +} +#endif /*__ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h new file mode 100644 index 0000000000..cd2f5516e2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLErode.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLERODE_H__ +#define __ARM_COMPUTE_CLERODE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute erode. This function calls the following OpenCL kernels: +* +* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) +* -# @ref CLErodeKernel +* +*/ +class CLErode : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode + * + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_CLERODE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h new file mode 100644 index 0000000000..79d82af462 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFastCorners.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFASTCORNERS_H__ +#define __ARM_COMPUTE_CLFASTCORNERS_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute fast corners. This function calls the following CL kernels: + * + * -# @ref CLFastCornersKernel + * -# @ref CLNonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) + * -# @ref CLCopyToArrayKernel + * + */ +class CLFastCorners : public IFunction +{ +public: + /** Constructor */ + CLFastCorners(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCorners(const CLFastCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLFastCorners &operator=(const CLFastCorners &) = delete; + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. + * @param[out] corners Array of keypoints to store the results. + * @param[in,out] num_corners Record number of corners in the array + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const ICLImage *input, float threshold, bool nonmax_suppression, CLKeyPointArray *corners, unsigned int *num_corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + // Inherited methods overridden: + void run() override; + +private: + CLFastCornersKernel _fast_corners_kernel; + CLNonMaximaSuppression3x3 _suppr_func; + CLCopyToArrayKernel _copy_array_kernel; + CLImage _output; + CLImage _suppr; + Window _win; + bool _non_max; + unsigned int *_num_corners; + cl::Buffer _num_buffer; + CLKeyPointArray *_corners; + uint8_t _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_CLFASTCORNERS_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h new file mode 100644 index 0000000000..b4855475c3 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFILLBORDER_H__ +#define __ARM_COMPUTE_CLFILLBORDER_H__ + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLFillBorderKernel */ +class CLFillBorder : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in,out] tensor Source tensor. Data types supported: U8, S16 + * @param[in] border_width The border width + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); +}; +} +#endif /*__ARM_COMPUTE_FILLBORDER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h new file mode 100644 index 0000000000..826f445bd8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels: + * + * -# @ref CLTransposeKernel (if @p transpose_weights is set to true) + * -# @ref CLGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class CLFullyConnectedLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + CLFullyConnectedLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. + * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + */ + void configure(const ICLTensor *input, ICLTensor *output, bool transpose_weights, bool is_batched_fc_layer); + + // Inherited methods overridden: + void run() override; + +private: + CLTransposeKernel _transpose_kernel; + CLGEMMTranspose1xWKernel _transpose1xW_kernel; + CLTensor _transpose_output; + bool _transpose_weights; + bool _is_batched_fc_layer; +}; + +/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels: + * + * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false) (called once) + * -# @ref CLGEMMInterleave4x4Kernel (called if we have a multi-batch input) + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class CLFullyConnectedLayer : public IFunction +{ +public: + /** Constructor */ + CLFullyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input + * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); + + //Inherited methods override + void run() override; + +private: + void configure_fc_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + void configure_fc_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + void configure_conv_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + void configure_conv_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + + CLIm2ColKernel _im2col_kernel; + CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel; + CLGEMMInterleave4x4Kernel _interleave4x4_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + CLTensor _im2col_output; + CLTensor _interleave4x4_output; + CLTensor _reshape_weights_output; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _is_batched_fc_layer; + bool _accumulate_biases; +}; +} +#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h new file mode 100644 index 0000000000..043b2b8115 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMM_H__ +#define __ARM_COMPUTE_CLGEMM_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute GEMM on OpenCL. Data types supported: F32, F16. This function calls the following OpenCL kernels: + * + * -# @ref CLGEMMInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref CLGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * + */ +class CLGEMM : public IFunction +{ +public: + /** Default constructor. */ + CLGEMM(); + /** Initialise the kernel's inputs and output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * + * @note All tensors must have the same data type. Data types supported: F32, F16 + * + * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix + * + * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32, F16 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. + * @param[out] output Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + */ + void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta); + + // Inherited methods overridden: + void run() override; + +private: + CLGEMMInterleave4x4Kernel _interleave_kernel; + CLGEMMTranspose1xWKernel _transpose_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMMMatrixAdditionKernel _ma_kernel; + CLTensor _tmp_a; + CLTensor _tmp_b; + bool _run_vector_matrix_multiplication; + bool _run_addition; +}; +} + +#endif /* __ARM_COMPUTE_CLGEMM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h new file mode 100644 index 0000000000..b80136b328 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__ +#define __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute CLGEMMInterleave4x4Kernel. This function calls the following OpenCL kernel: + * + * -# @ref CLGEMMInterleave4x4Kernel + * + */ +class CLGEMMInterleave4x4 : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowp.h b/arm_compute/runtime/CL/functions/CLGEMMLowp.h new file mode 100644 index 0000000000..da8883c3f8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMMLowp.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMLOWP_H__ +#define __ARM_COMPUTE_CLGEMMLOWP_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute GEMMLowp on OpenCL. This function calls the following OpenCL kernels: +* +* -# @ref CLGEMMInterleave4x4Kernel +* -# @ref CLGEMMTranspose1xWKernel +* -# @ref CLGEMMLowpMatrixMultiplyKernel +* +*/ +class CLGEMMLowp : public IFunction +{ +public: + /** Constructor */ + CLGEMMLowp(); + /** Initialise the kernel's inputs, output + * + * @note GEMM_LOWP: low precision matrix multiply kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + * @param[in] a First input tensor (Matrix A). Data types supported: U8. + * @param[in] b Second input tensor (Matrix B). Data types supported: same as @p a. + * @param[out] output Output tensor. Data types supported: same as @p a. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Multiplied with each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + + // Inherited methods overridden: + void run() override; + +private: + CLGEMMInterleave4x4Kernel _interleave_kernel; + CLGEMMTranspose1xWKernel _transpose_kernel; + CLGEMMLowpMatrixMultiplyKernel _mm_kernel; + CLTensor _tmp_a; + CLTensor _tmp_b; +}; +} +#endif /*__ARM_COMPUTE_CLGEMMLOWP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h new file mode 100644 index 0000000000..f8223bc5f5 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3_H__ +#define __ARM_COMPUTE_CLGAUSSIAN3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian3x3Kernel + * + */ +class CLGaussian3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h new file mode 100644 index 0000000000..148b9a9924 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5_H__ +#define __ARM_COMPUTE_CLGAUSSIAN5X5_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian5x5HorKernel + * -# @ref CLGaussian5x5VertKernel + * + */ +class CLGaussian5x5 : public IFunction +{ +public: + /** Default Constructor. */ + CLGaussian5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */ + CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp; /**< Temporary buffer */ +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h new file mode 100644 index 0000000000..97935193dc --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ +#define __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ + +#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" + +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Common interface for all Gaussian pyramid functions + */ +class CLGaussianPyramid : public IFunction +{ +public: + /** Constructor */ + CLGaussianPyramid(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramid(const CLGaussianPyramid &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramid &operator=(const CLGaussianPyramid &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramid(CLGaussianPyramid &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default; + /** Default destructor */ + virtual ~CLGaussianPyramid() = default; + /** Initialise the function's source, destinations and border mode. + * + * @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: U8. + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0; + +protected: + ICLTensor *_input; + CLPyramid *_pyramid; + CLPyramid _tmp; +}; + +/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussianPyramidHorKernel + * -# @ref CLGaussianPyramidVertKernel + */ +class CLGaussianPyramidHalf : public CLGaussianPyramid +{ +public: + /** Constructor */ + CLGaussianPyramidHalf(); + + // Inherited methods overridden: + void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _border_handler; + std::unique_ptr _horizontal_reduction; + std::unique_ptr _vertical_reduction; +}; + +/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian5x5 + * -# @ref CLScaleKernel + */ +class CLGaussianPyramidOrb : public CLGaussianPyramid +{ +public: + /** Constructor */ + CLGaussianPyramidOrb(); + + // Inherited methods overridden: + void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _gauss5x5; + std::unique_ptr _scale_nearest; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h new file mode 100644 index 0000000000..cdb23bff33 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ +#define __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ + +#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class IHOG; +/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels: + * + * -# @ref CLHOGGradient + * -# @ref CLHOGOrientationBinningKernel + * -# @ref CLHOGBlockNormalizationKernel + * + */ +class CLHOGDescriptor : public IFunction +{ +public: + /** Default constructor */ + CLHOGDescriptor(); + /** Initialise the function's source, destination, HOG data-object and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block + * @param[in] hog HOG data object which describes the HOG descriptor + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + CLHOGGradient _gradient; + CLHOGOrientationBinningKernel _orient_bin; + CLHOGBlockNormalizationKernel _block_norm; + CLTensor _mag; + CLTensor _phase; + CLTensor _hog_space; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h new file mode 100644 index 0000000000..0b4fad7766 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDETECTOR_H__ +#define __ARM_COMPUTE_CLHOGDETECTOR_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel: + * + * -# @ref CLHOGDetectorKernel + * + */ +class CLHOGDetector : public IFunction +{ +public: + /** Default constructor */ + CLHOGDetector(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetector(const CLHOGDetector &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetector &operator=(const CLHOGDetector &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetector(CLHOGDetector &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetector &operator=(CLHOGDetector &&) = default; + /** Default destructor */ + ~CLHOGDetector() = default; + /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class + * + * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. + * + * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 + * @param[in] hog HOG data-object that describes the HOG descriptor + * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the block stride stored in hog + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); + + // Inherited methods overridden: + void run() override; + +private: + CLHOGDetectorKernel _hog_detector_kernel; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer _num_detection_windows; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDETECTOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h new file mode 100644 index 0000000000..e74a68497f --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGGRADIENT_H__ +#define __ARM_COMPUTE_CLHOGGRADIENT_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLDerivative.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels: + * + * -# @ref CLDerivative + * -# @ref CLMagnitudePhaseKernel + * + */ +class CLHOGGradient : public IFunction +{ +public: + /** Default constructor */ + CLHOGGradient(); + /** Initialise the function's source, destinations, phase type and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8. + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. + * @param[out] output_phase Output tensor.(phase). Format supported: U8 + * @param[in] phase_type Type of @ref PhaseType + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + CLDerivative _derivative; + CLMagnitudePhaseKernel _mag_phase; + CLTensor _gx; + CLTensor _gy; +}; +} +#endif /*__ARM_COMPUTE_CLHOGGRADIENT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h new file mode 100644 index 0000000000..3fe0fa932a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ +#define __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLMultiHOG.h" +#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLHOGDetector.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels: + * + * -# @ref CLHOGGradient + * -# @ref CLHOGOrientationBinningKernel + * -# @ref CLHOGBlockNormalizationKernel + * -# @ref CLHOGDetector + * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) + * + * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: + * -# Phase type + -# Normalization type + -# L2 hysteresis threshold if the normalization type is L2HYS_NORM + * + */ +class CLHOGMultiDetection : public IFunction +{ +public: + /** Default constructor */ + CLHOGMultiDetection(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGMultiDetection(const CLHOGMultiDetection &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete; + /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. + * This container should store the HOG data-objects in descending or ascending cell_size width order. + * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects + * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects + * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object + * The dimension of this array must be the same of multi_hog->num_models() + * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. + * True if the non-maxima suppression stage has to be computed + * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage + * + */ + void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value = 0, + float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + + // Inherited method overridden: + void run() override; + +private: + CLHOGGradient _gradient_kernel; + std::unique_ptr _orient_bin_kernel; + std::unique_ptr _block_norm_kernel; + std::unique_ptr _hog_detect_kernel; + std::unique_ptr _non_maxima_kernel; + std::unique_ptr _hog_space; + std::unique_ptr _hog_norm_space; + ICLDetectionWindowArray *_detection_windows; + CLTensor _mag; + CLTensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h new file mode 100644 index 0000000000..90da687435 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHARRISCORNERS_H__ +#define __ARM_COMPUTE_CLHARRISCORNERS_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute harris corners detection. This function calls the following CL and NEON kernels and functions: + * + * @note Requires CPU support for the kernels: CPPCornerCandidatesKernel and CPPSortEuclideanDistanceKernel. + * + * -# @ref CLSobel3x3 (if gradient_size == 3) or
+ * @ref CLSobel5x5 (if gradient_size == 5) or
+ * @ref CLSobel7x7 (if gradient_size == 7) + * -# @ref CLFillBorderKernel + * -# @ref CLHarrisScoreKernel + * -# @ref CLNonMaximaSuppression3x3 + * -# @ref CPPCornerCandidatesKernel + * -# @ref CPPSortEuclideanDistanceKernel + */ +class CLHarrisCorners : public IFunction +{ +public: + /** Constructor */ + CLHarrisCorners(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisCorners(const CLHarrisCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete; + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] min_dist Radial Euclidean distance for the euclidean distance stage. + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 + * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLImage *input, float threshold, float min_dist, float sensitivity, + int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Sobel function */ + CLHarrisScoreKernel _harris_score; /**< Harris score kernel */ + CLNonMaximaSuppression3x3Kernel _non_max_suppr; /**< Non-maxima suppression function */ + CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ + CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ + CLFillBorderKernel _border_gx; /**< Border handler before running harris score */ + CLFillBorderKernel _border_gy; /**< Border handler before running harris score */ + CLImage _gx; /**< Source image - Gx component */ + CLImage _gy; /**< Source image - Gy component */ + CLImage _score; /**< Source image - Harris score */ + CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */ + std::unique_ptr _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ + int32_t _num_corner_candidates; /**< Number of potential corner candidates */ + ICLKeyPointArray *_corners; /**< Output corners array */ +}; +} +#endif /*__ARM_COMPUTE_CLHARRISCORNERS_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h new file mode 100644 index 0000000000..455b61812d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHistogram.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHISTOGRAM_H__ +#define __ARM_COMPUTE_CLHISTOGRAM_H__ + +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLTensor = ICLImage; + +/** Basic function to execute histogram. This function calls the following OpenCL kernels: + * + * -# @ref CLHistogramKernel + * -# @ref CLHistogramBorderKernel + * + */ +class CLHistogram : public IFunction +{ +public: + /* + * @ Default constructor + */ + CLHistogram(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogram(const CLHistogram &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLHistogram &operator=(const CLHistogram &) = delete; + /** Initialize the function + * + * @param[in] input Source image. Data types supported: U8 + * @param[out] output Output distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run() override; + +private: + CLHistogramKernel _kernel; /**< kernel to run */ + CLHistogramBorderKernel _kernel_border; /**< Border kernel to run */ +}; +} +#endif /*__ARM_COMPUTE_CLHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h new file mode 100644 index 0000000000..25fc549b29 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLINTEGRALIMAGE_H__ +#define __ARM_COMPUTE_CLINTEGRALIMAGE_H__ + +#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute integral image. This function calls the following OpenCL kernels: + * + * -# @ref CLIntegralImageHorKernel + * -# @ref CLIntegralImageVertKernel + * + */ +class CLIntegralImage : public IFunction +{ +public: + /** Default Constructor. */ + CLIntegralImage(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +protected: + CLIntegralImageHorKernel _integral_hor; /**< Integral Image Horizontal kernel */ + CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */ +}; +} +#endif /*__ARM_COMPUTE_CLINTEGRALIMAGE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h new file mode 100644 index 0000000000..0c6708aa73 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ +#define __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute laplacian pyramid. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLGaussianPyramidHalf + * -# @ref CLGaussian5x5 + * -# @ref CLArithmeticSubtraction + * + * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then + * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. + * L(i) = I(i) - Gaussian5x5(I(i)) + * Level 0 has always the same first two dimensions as the input tensor. +*/ +class CLLaplacianPyramid : public IFunction +{ +public: + /** Constructor */ + CLLaplacianPyramid(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: S16. + * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16. + * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: + * output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + size_t _num_levels; + CLGaussianPyramidHalf _gaussian_pyr_function; + std::unique_ptr _convf; + std::unique_ptr _subf; + CLDepthConvert _depth_function; + CLPyramid _gauss_pyr; + CLPyramid _conv_pyr; +}; +} +#endif /*__ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h new file mode 100644 index 0000000000..4bc7eb65ce --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ +#define __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute laplacian reconstruction. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLArithmeticAddition + * -# @ref CLScale + * -# @ref CLDepthConvert + * + * This function reconstructs the original image from a Laplacian Image Pyramid. + * + * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the + * resolution of the next pyramid level. + * + * I(n-2) = upsample( input + L(n-1) + * + * For each pyramid level i, except i=0 and i=n-1: + * I(i-1) = upsample(I(i) + L(i)) + * + * output = I(0) + L(0) +*/ +class CLLaplacianReconstruct : public IFunction +{ +public: + /** Constructor */ + CLLaplacianReconstruct(); + /** Initialise the function's source, destinations and border mode. + * + * The Output image must have the same size as the first level of the pyramid. + * The Input image must have the same size as the last level of the pyramid. + * + * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. + * + * @param[in] pyramid Laplacian pyramid tensors, Data types supported at each level: S16. + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const CLPyramid *pyramid, const ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + CLPyramid _tmp_pyr; + std::unique_ptr _addf; + std::unique_ptr _scalef; + CLDepthConvert _depthf; +}; +} +#endif /*__ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h new file mode 100644 index 0000000000..b4e469196e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels: + * + * -# @ref CLLocallyConnectedLayerWeightsReshapeKernel (executed only once for each configuration) + * -# @ref CLIm2ColKernel + * -# @ref CLLocallyConnectedMatrixMultiplyKernel + * -# @ref CLCol2ImKernel + */ +class CLLocallyConnectedLayer : public IFunction +{ +public: + /** Default constructor */ + CLLocallyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + CLIm2ColKernel _input_im2col_kernel; + CLLocallyConnectedLayerWeightsReshapeKernel _weights_reshape_kernel; + CLLocallyConnectedMatrixMultiplyKernel _mm_kernel; + CLCol2ImKernel _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _weights_reshaped; + CLTensor _gemm_output; + bool _is_first_run; +}; +} +#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h new file mode 100644 index 0000000000..dc5f9139b3 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMagnitude.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMAGNITUDE_H__ +#define __ARM_COMPUTE_CLMAGNITUDE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLMagnitudePhaseKernel. */ +class CLMagnitude : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs. + * + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: S16. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); +}; +} +#endif /*__ARM_COMPUTE_CLMAGNITUDE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h new file mode 100644 index 0000000000..e33bcdd779 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEANSTDDEV_H__ +#define __ARM_COMPUTE_CLMEANSTDDEV_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */ +class CLMeanStdDev : public IFunction +{ +public: + /** Default Constructor. */ + CLMeanStdDev(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional)Output standard deviation of pixel values. + */ + void configure(const ICLImage *input, float *mean, float *stddev = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ +}; +} +#endif /*__ARM_COMPUTE_CLMEANSTDDEV_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h new file mode 100644 index 0000000000..af84ba7289 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEDIAN3X3_H__ +#define __ARM_COMPUTE_CLMEDIAN3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute median filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLMedian3x3Kernel + * + */ +class CLMedian3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLMEDIAN3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h new file mode 100644 index 0000000000..84fd67515b --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMINMAXLOCATION_H__ +#define __ARM_COMPUTE_CLMINMAXLOCATION_H__ + +#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute min and max location. This function calls the following OpenCL kernels: + * + * -# @ref CLMinMaxKernel + * -# @ref CLMinMaxLocationKernel + */ +class CLMinMaxLocation : public IFunction +{ +public: + /** Constructor */ + CLMinMaxLocation(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocation(const CLMinMaxLocation &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocation &operator=(const CLMinMaxLocation &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocation(CLMinMaxLocation &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default; + /** Initialise the kernel's inputs and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8 or S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + * @param[out] min_count (Optional) Number of minimum value encounters. + * @param[out] max_count (Optional) Number of maximum value encounters. + */ + void configure(const ICLImage *input, int32_t *min, int32_t *max, + CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + CLMinMaxKernel _min_max_kernel; /**< Kernel that performs min/max */ + CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */ + cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */ + cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Minimum value occurrences. */ + uint32_t *_max_count; /**< Maximum value occurrences. */ + CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */ + CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */ +}; +} +#endif /*__ARM_COMPUTE_CLMINMAXLOCATION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h new file mode 100644 index 0000000000..9eee33e0ba --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONLINEARFILTER_H__ +#define __ARM_COMPUTE_CLNONLINEARFILTER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute non linear filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLNonLinearFilterKernel + * + * @note Supported mask dimensions squares of sizes 3, 5 + */ +class CLNonLinearFilter : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLNONLINEARFILTER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h new file mode 100644 index 0000000000..7adced4313 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ +#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLNonMaximaSuppression3x3Kernel + */ +class CLNonMaximaSuppression3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * The constant values used with CONSTANT border mode is 0 + * + * @param[in,out] input Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input. + * @param[in] border_mode Border mode to use for non-maxima suppression. + * The implementation supports just 2 border modes: UNDEFINED and CONSTANT + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode); +}; +} +#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h new file mode 100644 index 0000000000..a4dae85c1d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to simulate a normalization layer. This function calls the following CL kernels: + * + * -# @ref CLPixelWiseMultiplicationKernel + * -# @ref CLFillBorderKernel + * -# @ref CLNormalizationLayerKernel + * + */ +class CLNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + CLNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run() override; + +private: + CLTensor _squared_input; /**< The intermediate buffer which stores results of squaring input*/ + CLNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */ + CLPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */ + CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h new file mode 100644 index 0000000000..ca3f86100e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLOPTICALFLOW_H__ +#define __ARM_COMPUTE_CLOPTICALFLOW_H__ + +#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class CLPyramid; + +using CLLKInternalKeypointArray = CLArray; +using CLCoefficientTableArray = CLArray; +using CLOldValueArray = CLArray; + +/** Basic function to execute optical flow. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLScharr3x3 + * -# @ref CLLKTrackerInitKernel + * -# @ref CLLKTrackerStage0Kernel + * -# @ref CLLKTrackerStage1Kernel + * -# @ref CLLKTrackerFinalizeKernel + */ +class CLOpticalFlow : public IFunction +{ +public: + /** Default constructor */ + CLOpticalFlow(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOpticalFlow(const CLOpticalFlow &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOpticalFlow &operator=(const CLOpticalFlow &) = delete; + /** Allow instances of this class to be moved */ + CLOpticalFlow(CLOpticalFlow &&) = default; + /** Allow instances of this class to be moved */ + CLOpticalFlow &operator=(CLOpticalFlow &&) = default; + /** Initialise the function input and output + * + * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8 + * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data types supported U8 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] border_mode The border mode applied at scharr kernel stage + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT + * + */ + void configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid, + const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _tracker_init_kernel; + std::unique_ptr _tracker_stage0_kernel; + std::unique_ptr _tracker_stage1_kernel; + CLLKTrackerFinalizeKernel _tracker_finalize_kernel; + std::unique_ptr _func_scharr; + std::unique_ptr _scharr_gx; + std::unique_ptr _scharr_gy; + const ICLKeyPointArray *_old_points; + const ICLKeyPointArray *_new_points_estimates; + ICLKeyPointArray *_new_points; + std::unique_ptr _old_points_internal; + std::unique_ptr _new_points_internal; + std::unique_ptr _coefficient_table; + std::unique_ptr _old_values; + size_t _num_levels; +}; +} +#endif /*__ARM_COMPUTE_CLOPTICALFLOW_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h new file mode 100644 index 0000000000..7cdfab16e2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPhase.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPHASE_H__ +#define __ARM_COMPUTE_CLPHASE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute an @ref CLMagnitudePhaseKernel. */ +class CLPhase : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED); +}; +} +#endif /*__ARM_COMPUTE_CLPHASE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h new file mode 100644 index 0000000000..71754fc3f4 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ +#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLPixelWiseMultiplicationKernel. */ +class CLPixelWiseMultiplication : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16 or F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16 or F32. + * @param[out] output Output tensor. Data types supported: U8(Only if both inputs are U8), S16, F16 or F32. + * @param[in] scale Scale to apply after multiplication. Must be positive. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); +}; +} +#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h new file mode 100644 index 0000000000..f92860e5b2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPOOLINGLAYER_H__ +#define __ARM_COMPUTE_CLPOOLINGLAYER_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if padding size is different from zero) + * -# @ref CLPoolingLayerKernel + */ +class CLPoolingLayer : public ICLSimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: F16, F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); +}; +} +#endif /* __ARM_COMPUTE_CLPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h new file mode 100644 index 0000000000..4cb2be90e7 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLRemap.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLREMAP_H__ +#define __ARM_COMPUTE_CLREMAP_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute remap. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLRemapKernel + */ +class CLRemap : public ICLSimpleFunction +{ +public: + /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] map_x Map for X coords. Data types supported: F32. + * @param[in] map_y Map for Y coords. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, + InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLREMAP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h new file mode 100644 index 0000000000..c2438ddf9b --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLScale.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCALE_H__ +#define __ARM_COMPUTE_CLSCALE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLScaleKernel */ +class CLScale : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8, S16. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSCALE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h new file mode 100644 index 0000000000..3ea0b84624 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHARR3X3_H__ +#define __ARM_COMPUTE_CLSCHARR3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLScharr3x3Kernel + * + */ +class CLScharr3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSCHARR3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h new file mode 100644 index 0000000000..7a4f47d0ed --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL3X3_H__ +#define __ARM_COMPUTE_CLSOBEL3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel3x3Kernel + * + */ +class CLSobel3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h new file mode 100644 index 0000000000..ad1f72faf8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL5X5_H__ +#define __ARM_COMPUTE_CLSOBEL5X5_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel5x5HorKernel + * -# @ref CLSobel5x5VertKernel + * + */ +class CLSobel5x5 : public IFunction +{ +public: + /** Default Constructor. */ + CLSobel5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL5X5_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h new file mode 100644 index 0000000000..1a3fe1a50a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL7X7_H__ +#define __ARM_COMPUTE_CLSOBEL7X7_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel7x7HorKernel + * -# @ref CLSobel7x7VertKernel + * + */ +class CLSobel7x7 : public IFunction +{ +public: + /** Default Constructor. */ + CLSobel7x7(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32. + * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL7X7_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h new file mode 100644 index 0000000000..42cfc06fc4 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOFTMAXLAYER_H__ +#define __ARM_COMPUTE_CLSOFTMAXLAYER_H__ + +#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to compute a SoftmaxLayer. + * + * Softmax is calculated by : + * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f] + * + * This function runs the following kernels: + * -# @ref CLLogits1DMaxKernel + * -# @ref CLLogits1DShiftExpSumKernel + * -# @ref CLLogits1DNormKernel + */ +class CLSoftmaxLayer : public IFunction +{ +public: + /** Constructor */ + CLSoftmaxLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + CLLogits1DMaxKernel _max_kernel; + CLLogits1DShiftExpSumKernel _shift_exp_sum_kernel; + CLLogits1DNormKernel _norm_kernel; + CLTensor _max; + CLTensor _sum; + CLTensor _tmp; +}; +} +#endif /* __ARM_COMPUTE_CLSOFTMAXLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h new file mode 100644 index 0000000000..ebe6593b6a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLTableLookup.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTABLELOOKUP_H__ +#define __ARM_COMPUTE_CLTABLELOOKUP_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Basic function to run @ref CLTableLookupKernel */ +class CLTableLookup : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input First tensor input. Data types supported: U8 and S16 + * @param[in] lut Input lookup table. Data types supported: U8 and S16 + * @param[out] output Output tensor. Data types supported: U8 and S16 + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} +#endif /*__ARM_COMPUTE_CLTABLELOOKUP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h new file mode 100644 index 0000000000..14c05786c1 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLThreshold.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTHRESHOLD_H__ +#define __ARM_COMPUTE_CLTHRESHOLD_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLThresholdKernel */ +class CLThreshold : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold. + * @param[in] false_value Value to assign when the condition is false. + * @param[in] true_value value to assign when the condition is true. + * @param[in] type Thresholding type. Can either be BINARY or RANGE. + * @param[in] upper Upper threshold. Only used with RANGE thresholding + */ + void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value = 0, uint8_t true_value = 0, + ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); +}; +} +#endif /*__ARM_COMPUTE_CLTHRESHOLD_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h new file mode 100644 index 0000000000..9b57fe00a8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSE_H__ +#define __ARM_COMPUTE_CLTRANSPOSE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel: + * + * -# @ref CLTransposeKernel + * + */ +class CLTranspose : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_CLTRANSPOSE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h new file mode 100644 index 0000000000..aeab3f7b22 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPAFFINE_H__ +#define __ARM_COMPUTE_CLWARPAFFINE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */ +class CLWarpAffine : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in,out] input Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The affine matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLWARPAFFINE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h new file mode 100644 index 0000000000..80237017aa --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPPERSPECTIVE_H__ +#define __ARM_COMPUTE_CLWARPPERSPECTIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */ +class CLWarpPerspective : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLWARPPERSPECTIVE_H__ */ diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h new file mode 100644 index 0000000000..7a37e5ec21 --- /dev/null +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPSCHEDULER_H__ +#define __ARM_COMPUTE_CPPSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +#include + +namespace arm_compute +{ +class Thread; + +/** C++11 implementation of a pool of threads to automatically split a kernel's execution among several threads. */ +class CPPScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then the maximum number of threads supported by C++11 will be used, otherwise the number of threads specified. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the CPPScheduler has in his pool. + * + * @return Number of threads available in CPPScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static CPPScheduler &get(); + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor: create a pool of threads. */ + CPPScheduler(); + + unsigned int _num_threads; + std::unique_ptr _threads; +}; +} +#endif /* __ARM_COMPUTE_CPPSCHEDULER_H__ */ diff --git a/arm_compute/runtime/Distribution1D.h b/arm_compute/runtime/Distribution1D.h new file mode 100644 index 0000000000..7080e88075 --- /dev/null +++ b/arm_compute/runtime/Distribution1D.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DISTRIBUTION1D_H__ +#define __ARM_COMPUTE_DISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution1D.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Basic implementation of the 1D distribution interface */ +class Distribution1D : public IDistribution1D +{ +public: + /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + Distribution1D(size_t num_bins, int32_t offset, uint32_t range); + + // Inherited methods overridden: + uint32_t *buffer() const override; + +private: + std::unique_ptr _data; /**< The distribution data. */ +}; +} +#endif /* __ARM_COMPUTE_DISTRIBUTION1D_H__ */ diff --git a/arm_compute/runtime/HOG.h b/arm_compute/runtime/HOG.h new file mode 100644 index 0000000000..70d8034bef --- /dev/null +++ b/arm_compute/runtime/HOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HOG_H__ +#define __ARM_COMPUTE_HOG_H__ + +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** CPU implementation of HOG data-object */ +class HOG : public IHOG +{ +public: + /** Default constructor */ + HOG(); + /** Allocate the HOG descriptor using the given HOG's metadata + * + * @param[in] input HOG's metadata used to allocate the HOG descriptor + */ + void init(const HOGInfo &input); + + // Inherited method overridden: + const HOGInfo *info() const override; + float *descriptor() const override; + +private: + HOGInfo _info; + std::unique_ptr _descriptor; +}; +} +#endif /* __ARM_COMPUTE_HOG_H__ */ diff --git a/arm_compute/runtime/IFunction.h b/arm_compute/runtime/IFunction.h new file mode 100644 index 0000000000..a4e7ed15e0 --- /dev/null +++ b/arm_compute/runtime/IFunction.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IFUNCTION_H__ +#define __ARM_COMPUTE_IFUNCTION_H__ + +namespace arm_compute +{ +/** Base class for all functions */ +class IFunction +{ +public: + /** Run the kernels contained in the function + * + * For NEON kernels: + * - Multi-threading is used for the kernels which are parallelisable. + * - By default std::thread::hardware_concurrency() threads are used. + * + * @note @ref CPPScheduler::set_num_threads() can be used to manually set the number of threads + * + * For OpenCL kernels: + * - All the kernels are enqueued on the queue associated with CLScheduler. + * - The queue is then flushed. + * + * @note The function will not block until the kernels are executed. It is the user's responsibility to wait. + */ + virtual void run() = 0; + /** Destructor + * + */ + virtual ~IFunction() = default; +}; +} +#endif /*__ARM_COMPUTE_IFUNCTION_H__ */ diff --git a/arm_compute/runtime/ILutAllocator.h b/arm_compute/runtime/ILutAllocator.h new file mode 100644 index 0000000000..f23fbd2154 --- /dev/null +++ b/arm_compute/runtime/ILutAllocator.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ILUTALLOCATOR_H__ +#define __ARM_COMPUTE_ILUTALLOCATOR_H__ + +#include "arm_compute/core/Types.h" + +#include +#include + +namespace arm_compute +{ +/** Basic interface to allocate LUTs' */ +class ILutAllocator +{ +public: + /** Default constructor */ + ILutAllocator(); + /** Default virtual destructor */ + virtual ~ILutAllocator() = default; + /** Allow instances of this class to be move constructed */ + ILutAllocator(ILutAllocator &&) = default; + /** Allow instances of this class to be moved */ + ILutAllocator &operator=(ILutAllocator &&) = default; + /** Allocate an LUT of the requested number of elements and data_type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + void init(size_t num_elements, DataType data_type); + /** Returns the total number of elements in the LUT. + * + * @return Total number of elements. + */ + size_t num_elements() const; + /** Returns the type of the LUT. + * + * @return The type of the LUT. + */ + DataType type() const; + /** Returns the total size in bytes of the LUT. + * + * @return Total size of the LUT in bytes. + */ + size_t size() const; + +protected: + /** Interface to be implemented by the child class to allocate the LUT. */ + virtual void allocate() = 0; + /** Interface to be implemented by the child class to lock the memory allocation for the CPU to access. + * + * @return Pointer to a CPU mapping of the memory + */ + virtual uint8_t *lock() = 0; + /** Interface to be implemented by the child class to unlock the memory allocation after the CPU is done accessing it. */ + virtual void unlock() = 0; + +private: + size_t _num_elements; /**< Number of elements allocated */ + DataType _data_type; /**< Data type of LUT elements. */ +}; +} +#endif /* __ARM_COMPUTE_ILUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h new file mode 100644 index 0000000000..39c027c6b7 --- /dev/null +++ b/arm_compute/runtime/IScheduler.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ISCHEDULER_H__ +#define __ARM_COMPUTE_ISCHEDULER_H__ + +namespace arm_compute +{ +class ICPPKernel; + +/** Scheduler interface to run kernels */ +class IScheduler +{ +public: + /** Destructor. */ + virtual ~IScheduler() = default; + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. + */ + virtual void set_num_threads(unsigned int num_threads) = 0; + /** Returns the number of threads that the SingleThreadScheduler has in his pool. + * + * @return Number of threads available in SingleThreadScheduler. + */ + virtual unsigned int num_threads() const = 0; + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + virtual void schedule(ICPPKernel *kernel, unsigned int split_dimension) = 0; +}; +} +#endif /* __ARM_COMPUTE_ISCHEDULER_H__ */ diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h new file mode 100644 index 0000000000..6103e436bc --- /dev/null +++ b/arm_compute/runtime/ITensorAllocator.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSORALLOCATOR_H__ +#define __ARM_COMPUTE_ITENSORALLOCATOR_H__ + +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Interface to allocate tensors */ +class ITensorAllocator +{ +public: + /** Default constructor. */ + ITensorAllocator(); + /** Allow instances of this class to be copy constructed */ + ITensorAllocator(const ITensorAllocator &) = default; + /** Allow instances of this class to be copied */ + ITensorAllocator &operator=(const ITensorAllocator &) = default; + /** Allow instances of this class to be move constructed */ + ITensorAllocator(ITensorAllocator &&) = default; + /** Allow instances of this class to be moved */ + ITensorAllocator &operator=(ITensorAllocator &&) = default; + /** Default virtual destructor. */ + virtual ~ITensorAllocator() = default; + + /** Initialize a tensor based on the passed @ref TensorInfo. + * + * @param[in] input TensorInfo object containing the description of the tensor to initialize. + */ + void init(const TensorInfo &input); + /** Return a reference to the tensor's metadata + * + * @return Reference to the tensor's metadata. + */ + TensorInfo &info(); + /** Return a constant reference to the tensor's metadata + * + * @return Constant reference to the tensor's metadata. + */ + const TensorInfo &info() const; + + /** Interface to be implemented by the child class to allocate the tensor. + * + * @note The child is expected to use the TensorInfo to get the size of the memory allocation. + * @warning The tensor must not already be allocated. Otherwise calling the function will fail. + */ + virtual void allocate() = 0; + + /** Interface to be implemented by the child class to free the allocated tensor. + * + * @warning The tensor must have been allocated previously. Otherwise calling the function will fail. + */ + virtual void free() = 0; + +protected: + /** Interface to be implemented by the child class to lock the memory allocation for the CPU to access. + * + * @return Pointer to a CPU mapping of the memory + */ + virtual uint8_t *lock() = 0; + /** Interface to be implemented by the child class to unlock the memory allocation after the CPU is done accessing it. */ + virtual void unlock() = 0; + +private: + TensorInfo _info; /**< Tensor's metadata. */ +}; +} +#endif /*__ARM_COMPUTE_ITENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/Lut.h b/arm_compute/runtime/Lut.h new file mode 100644 index 0000000000..87431feee4 --- /dev/null +++ b/arm_compute/runtime/Lut.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LUT_H__ +#define __ARM_COMPUTE_LUT_H__ + +#include "arm_compute/core/ILut.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/LutAllocator.h" + +#include +#include + +namespace arm_compute +{ +class ILutAllocator; + +/** Basic implementation of the LUT interface */ +class Lut : public ILut +{ +public: + /** Constructor */ + Lut(); + /** Constructor: initializes a LUT which can contain num_values values of data_type type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + Lut(size_t num_elements, DataType data_type); + /** Return a pointer to the lut's allocator + * + * @return A pointer to the lut's allocator + */ + ILutAllocator *allocator(); + + // Inherited methods overridden: + size_t num_elements() const override; + uint32_t index_offset() const override; + size_t size_in_bytes() const override; + DataType type() const override; + uint8_t *buffer() const override; + void clear() override; + +private: + LutAllocator _allocator; /**< Instance of the basic CPU allocator.*/ +}; +} +#endif /* __ARM_COMPUTE_LUT_H__ */ diff --git a/arm_compute/runtime/LutAllocator.h b/arm_compute/runtime/LutAllocator.h new file mode 100644 index 0000000000..76b596bfa0 --- /dev/null +++ b/arm_compute/runtime/LutAllocator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LUTALLOCATOR_H__ +#define __ARM_COMPUTE_LUTALLOCATOR_H__ + +#include "arm_compute/runtime/ILutAllocator.h" + +#include +#include + +namespace arm_compute +{ +/** Basic implementation of a CPU memory LUT allocator. */ +class LutAllocator : public ILutAllocator +{ +public: + /** Default constructor. */ + LutAllocator(); + /** Interface to be implemented by the child class to return the pointer to the allocate data. */ + uint8_t *data() const; + +protected: + /** Allocate num_elements() * sizeof(type()) of CPU memory. */ + void allocate() override; + /** No-op for CPU memory + * + * @return A pointer to the beginning of the look up table's allocation. + */ + uint8_t *lock() override; + /** No-op for CPU memory. */ + void unlock() override; + +private: + std::unique_ptr _buffer; /**< CPU memory allocation. */ +}; +} +#endif /* __ARM_COMPUTE_LUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/MultiHOG.h b/arm_compute/runtime/MultiHOG.h new file mode 100644 index 0000000000..32bad70738 --- /dev/null +++ b/arm_compute/runtime/MultiHOG.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIHOG_H__ +#define __ARM_COMPUTE_MULTIHOG_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/HOG.h" + +#include + +namespace arm_compute +{ +/** CPU implementation of multi HOG data-object */ +class MultiHOG : public IMultiHOG +{ +public: + /** Constructor + * + * @param[in] num_models Number of HOG data objects to contain + * + */ + MultiHOG(size_t num_models); + + // Inherited methods overridden: + size_t num_models() const override; + IHOG *model(size_t index) override; + const IHOG *model(size_t index) const override; + +private: + size_t _num_models; + std::unique_ptr _model; +}; +} + +#endif /* __ARM_COMPUTE_MULTIHOG_H__ */ diff --git a/arm_compute/runtime/MultiImage.h b/arm_compute/runtime/MultiImage.h new file mode 100644 index 0000000000..917e586ef8 --- /dev/null +++ b/arm_compute/runtime/MultiImage.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIIMAGE_H__ +#define __ARM_COMPUTE_MULTIIMAGE_H__ + +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class Coordinates; +class ITensor; +using IImage = ITensor; + +/** Basic implementation of the multi-planar image interface */ +class MultiImage : public IMultiImage +{ +public: + /** Constructor */ + MultiImage(); + /** Allocate the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init(unsigned int width, unsigned int height, Format format); + /** Allocate the multi-planar image + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init_auto_padding(unsigned int width, unsigned int height, Format format); + /** Allocated a previously initialised multi image + * + * @note The multi image must not already be allocated when calling this function. + * + **/ + void allocate(); + /** Create a subimage from an existing MultiImage. + * + * @param[in] image Image to use backing memory from + * @param[in] coords Starting coordinates of the new image. Should be within the parent image sizes + * @param[in] width The width of the subimage + * @param[in] height The height of the subimage + */ + void create_subimage(MultiImage *image, const Coordinates &coords, unsigned int width, unsigned int height); + + // Inherited methods overridden: + const MultiImageInfo *info() const override; + Image *plane(unsigned int index) override; + const Image *plane(unsigned int index) const override; + +private: + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + * @param[in] auto_padding Specifies whether the image uses auto padding + */ + void internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding); + + MultiImageInfo _info; /** Instance of the multi-planar image's meta data */ + std::array _plane; /* Instance Image to hold the planar's information */ +}; +} +#endif /*__ARM_COMPUTE_MULTIIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h new file mode 100644 index 0000000000..6e000d8fd8 --- /dev/null +++ b/arm_compute/runtime/NEON/INESimpleFunction.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INESIMPLEFUNCTION_H__ +#define __ARM_COMPUTE_INESIMPLEFUNCTION_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single NEON kernel */ +class INESimpleFunction : public IFunction +{ +public: + /** Constructor */ + INESimpleFunction(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ + NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ +}; +} +#endif /*__ARM_COMPUTE_INESIMPLEFUNCTION_H__ */ diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h new file mode 100644 index 0000000000..daf76f3a87 --- /dev/null +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFUNCTIONS_H__ +#define __ARM_COMPUTE_NEFUNCTIONS_H__ + +/* Header regrouping all the NEON functions */ +#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" +#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" +#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" +#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" +#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" +#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" +#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" +#include "arm_compute/runtime/NEON/functions/NEConvolution.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEDerivative.h" +#include "arm_compute/runtime/NEON/functions/NEDilate.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" +#include "arm_compute/runtime/NEON/functions/NEErode.h" +#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" +#include "arm_compute/runtime/NEON/functions/NEFillBorder.h" +#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/runtime/NEON/functions/NEGEMM.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMLowp.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" +#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" +#include "arm_compute/runtime/NEON/functions/NEHistogram.h" +#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" +#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" +#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" +#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h" +#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" +#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" +#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" +#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" +#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" +#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" +#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" +#include "arm_compute/runtime/NEON/functions/NEPhase.h" +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/NEON/functions/NERemap.h" +#include "arm_compute/runtime/NEON/functions/NEScale.h" +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" +#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" +#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" +#include "arm_compute/runtime/NEON/functions/NETableLookup.h" +#include "arm_compute/runtime/NEON/functions/NEThreshold.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" +#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" +#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" + +#endif /* __ARM_COMPUTE_NEFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h new file mode 100644 index 0000000000..94c82b2f03 --- /dev/null +++ b/arm_compute/runtime/NEON/NEScheduler.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHEDULER_H__ +#define __ARM_COMPUTE_NESCHEDULER_H__ + +#include "arm_compute/runtime/Scheduler.h" + +namespace arm_compute +{ +using NEScheduler = Scheduler; +} +#endif /*__ARM_COMPUTE_NESCHEDULER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h new file mode 100644 index 0000000000..266a27586a --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ +#define __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEAbsoluteDifferenceKernel + * + * @note The image data type for the inputs must be U8 or S16 + * @note The function calculates the absolute difference also when the 2 inputs have different image data types + */ +class NEAbsoluteDifference : public INESimpleFunction +{ +public: + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h new file mode 100644 index 0000000000..de532c37a0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACCUMULATE_H__ +#define __ARM_COMPUTE_NEACCUMULATE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEAccumulateKernel */ +class NEAccumulate : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *output); +}; + +/** Basic function to run @ref NEAccumulateWeightedKernel */ +class NEAccumulateWeighted : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha The input scalar value with a value input the range of [0, 1.0] + * @param[in,out] output Accumulated tensor. Data type supported: U8. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false); +}; + +/** Basic function to run @ref NEAccumulateSquaredKernel */ +class NEAccumulateSquared : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift The input with a value input the range of [0, 15] + * @param[in,out] output Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEACCUMULATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h new file mode 100644 index 0000000000..35366e16fb --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACTIVATIONLAYER_H__ +#define __ARM_COMPUTE_NEACTIVATIONLAYER_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEActivationLayerKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class NEActivationLayer : public INESimpleFunction +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data type supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer parameters. + */ + void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); +}; +} +#endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h new file mode 100644 index 0000000000..8e34e983c7 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICADDITION_H__ +#define __ARM_COMPUTE_NEARITHMETICADDITION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEArithmeticAdditionKernel */ +class NEArithmeticAddition : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8/S16. + * @param[in] input2 Second tensor input. Data types supported: U8/S16. + * @param[out] output Output tensor. Data types supported: U8/S16. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); +}; +} +#endif /*__ARM_COMPUTE_NEARITHMETICADDITION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h new file mode 100644 index 0000000000..841b5912b9 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ +#define __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEArithmeticSubtractionKernel */ +class NEArithmeticSubtraction : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8/S16. + * @param[in] input2 Second tensor input. Data types supported: U8/S16. + * @param[out] output Output tensor. Data types supported: U8/S16. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h new file mode 100644 index 0000000000..b0b5c122cb --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer. + * + * Batch normalization is calculated by: + * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * + */ +class NEBatchNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + NEBatchNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); + + // Inherited methods overridden: + void run() override; + +private: + NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */ +}; +} +#endif /* __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h new file mode 100644 index 0000000000..0250293e97 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEAND_H__ +#define __ARM_COMPUTE_NEBITWISEAND_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseAndKernel */ +class NEBitwiseAnd : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEAND_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h new file mode 100644 index 0000000000..62c08ffcf9 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISENOT_H__ +#define __ARM_COMPUTE_NEBITWISENOT_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseNotKernel */ +class NEBitwiseNot : public INESimpleFunction +{ +public: + /** Initialise the kernel's input and output + * + * @param[in] input Input tensor. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISENOT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h new file mode 100644 index 0000000000..1c9a2f9d2e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEOR_H__ +#define __ARM_COMPUTE_NEBITWISEOR_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseOrKernel */ +class NEBitwiseOr : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h new file mode 100644 index 0000000000..4690f0a4e3 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEXOR_H__ +#define __ARM_COMPUTE_NEBITWISEXOR_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseXorKernel */ +class NEBitwiseXor : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEXOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h new file mode 100644 index 0000000000..2b5440a74c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBox3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBOX3x3_H__ +#define __ARM_COMPUTE_NEBOX3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute box filter 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEBox3x3Kernel + * + */ +class NEBox3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's input, output and border mode. + * + * @note The border handler is run on the input tensor. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); +}; +} +#endif /*__ARM_COMPUTE_NEBOX3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h new file mode 100644 index 0000000000..fbf2d90740 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECANNYEDGE_H__ +#define __ARM_COMPUTE_NECANNYEDGE_H__ + +#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions: + * + * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) + * -# @ref NESobel3x3 (if gradient_size == 3) or + * @ref NESobel5x5 (if gradient_size == 5) or + * @ref NESobel7x7 (if gradient_size == 7) + * -# @ref NEGradientKernel + * -# @ref NEEdgeNonMaxSuppressionKernel + * -# @ref NEEdgeTraceKernel + * + */ +class NECannyEdge : public IFunction +{ +public: + /** Constructor + * + * Initialize Sobel kernel to nullptr. + */ + NECannyEdge(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECannyEdge(const NECannyEdge &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECannyEdge &operator=(const NECannyEdge &) = delete; + /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] upper_thr Upper threhold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis. + * @param[in] gradient_size Gradient size (3, 5 or 7) + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + * + */ + void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0, + bool use_fp16 = false); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Pointer to Sobel kernel */ + std::unique_ptr _gradient; /**< Gradient kernel */ + NEEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel */ + NEEdgeTraceKernel _edge_trace; /**< Edge tracing kernel */ + NEFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ + NEFillBorderKernel _border_edge_trace; /**< Fill border before edge trace */ + Tensor _gx; /**< Source tensor - Gx component */ + Tensor _gy; /**< Source tensor - Gy component */ + Tensor _magnitude; /**< Source tensor - Magnitude */ + Tensor _phase; /**< Source tensor - Phase */ + Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ + ITensor *_output; /**< Output tensor provided by the user. */ +}; +} +#endif /* __ARM_COMPUTE_NECANNYEDGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h new file mode 100644 index 0000000000..7133553e1d --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELCOMBINE_H__ +#define __ARM_COMPUTE_NECHANNELCOMBINE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */ +class NEChannelCombine : public INESimpleFunction +{ +public: + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECHANNELCOMBINE_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h new file mode 100644 index 0000000000..5e46eef3a6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELEXTRACT_H__ +#define __ARM_COMPUTE_NECHANNELEXTRACT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */ +class NEChannelExtract : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input tensor to extract the channel from. Formats supported: Any single planar. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Format supported: U8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image to extract channel from. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECHANNELEXTRACT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h new file mode 100644 index 0000000000..2997778ed5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECOLORCONVERT_H__ +#define __ARM_COMPUTE_NECOLORCONVERT_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; +class IMultiImage; +using IImage = ITensor; + +/**Basic function to run @ref NEColorConvertKernel to perform color conversion */ +class NEColorConvert : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input single-planar tensor from which to convert + * @param[in] output The converted single-planar output tensor + */ + void configure(const ITensor *input, ITensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted single-planar output image + */ + void configure(const IMultiImage *input, IImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The single-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const IImage *input, IMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const IMultiImage *input, IMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECOLORCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h new file mode 100644 index 0000000000..1704d9fa94 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConvolution.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTION_H__ +#define __ARM_COMPUTE_NECONVOLUTION_H__ + +#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolution3x3Kernel + * + */ +class NEConvolution3x3 : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8/S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; + +/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolutionKernel or
+ * @ref NESeparableConvolutionHorKernel and @ref NESeparableConvolutionVertKernel (if convolution matrix is separable) + * + */ +template +class NEConvolutionSquare : public IFunction +{ +public: + /** Default constructor */ + NEConvolutionSquare(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + NESeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + NESeparableConvolutionVertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + NEConvolutionKernel _kernel; /**< kernel for non-separated convolution **/ + NEFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to run 5x5 convolution. */ +using NEConvolution5x5 = NEConvolutionSquare<5>; +/** Basic function to run 7x7 convolution. */ +using NEConvolution7x7 = NEConvolutionSquare<7>; +/** Basic function to run 9x9 convolution. */ +using NEConvolution9x9 = NEConvolutionSquare<9>; + +/** Basic function to execute non-square convolution. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolutionRectangleKernel or
+ * + * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 + */ +class NEConvolutionRectangle : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] rows Rows of convolution kernel. + * @param[in] cols Columns of convolution kernel. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NECONVOLUTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h new file mode 100644 index 0000000000..a8fff8d047 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class ITensor; + +/** Function to reshape and perform 1xW transposition on the weights. This function calls the following kernels: + * -# @ref NEWeightsReshapeKernel + * -# @ref NEGEMMTranspose1xWKernel (executed in case GEMM is required for the operation) + */ +class NEConvolutionLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + NEConvolutionLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. + * Data types supported: Same as @p weights. + */ + void configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW); + // Inherited methods overridden: + void run() override; + +private: + NEWeightsReshapeKernel _weights_reshape_kernel; + NEGEMMTranspose1xWKernel _weights_transposed_kernel; + Tensor _weights_reshaped; + bool _transpose1xW; +}; + +/** Basic function to simulate a convolution layer. This function calls the following NEON kernels: + * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) + * -# @ref NEIm2ColKernel + * -# @ref NEGEMMInterleave4x4Kernel (executed only in case GEMM is required for the operation) + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NECol2ImKernel + */ +class NEConvolutionLayer : public IFunction +{ +public: + /** Constructor */ + NEConvolutionLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QS8/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + // Inherited methods overridden: + void run() override; + +private: + NEIm2ColKernel _input_im2col_kernel; + NEGEMMInterleave4x4Kernel _input_interleave_kernel; + NEConvolutionLayerReshapeWeights _reshape_weights; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _input_interleaved_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + bool _has_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; +}; +} +#endif /* __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h new file mode 100644 index 0000000000..02ff1227c7 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +class NEDepthConcatenateKernel; +class NEFillBorderKernel; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref NEDepthConcatenateKernel + * + */ +class NEDepthConcatenate : public IFunction +{ +public: + /** Default constructor */ + NEDepthConcatenate(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: F32. + */ + void configure(std::vector inputs_vector, ITensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvert.h b/arm_compute/runtime/NEON/functions/NEDepthConvert.h new file mode 100644 index 0000000000..7c59ce432d --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConvert.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONVERT_H__ +#define __ARM_COMPUTE_NEDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/**Basic function to run @ref NEDepthConvertKernel */ +class NEDepthConvert : public INESimpleFunction +{ +public: + /* Contructor */ + NEDepthConvert() = default; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEDepthConvert(const NEDepthConvert &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + const NEDepthConvert &operator=(const NEDepthConvert &) = delete; + /** Initialize the function's source, destination + * + * Input format must be different than output format. + * + * Valid conversions Input -> Output : + * QS8 -> F32 + * U8 -> U16, S16, S32 + * U16 -> U8, U32 + * S16 -> U8, S32 + * F32 -> QS8 + * + * + * @param[in] input The input tensor to convert. Data type supported: QS8/U8/U16/S16/F32. + * @param[out] output The output tensor. Data type supported: QS8/U8/U16/S16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * It is not used on fixed point conversion. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift); +}; +} +#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h new file mode 100644 index 0000000000..57b7409b39 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDerivative.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDERIVATIVE_H__ +#define __ARM_COMPUTE_NEDERIVATIVE_H__ + +#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute first order derivative operator. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEDerivativeKernel + * + */ +class NEDerivative : public IFunction +{ +public: + /** Default constructor */ + NEDerivative(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16. + * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + NEDerivativeKernel _kernel; /**< Derivative kernel */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /* __ARM_COMPUTE_NEDERIVATIVE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h new file mode 100644 index 0000000000..17bdb3363e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDilate.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDILATE_H__ +#define __ARM_COMPUTE_NEDILATE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute dilate. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEDilateKernel + * + */ +class NEDilate : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode. + * + * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_NEDILATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h new file mode 100644 index 0000000000..a356cac7c8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Function to run the direct convolution. + * + * This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel for the input + * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel + * -# @ref NEDirectConvolutionLayerKernel + */ +class NEDirectConvolutionLayer : public IFunction +{ +public: + /** Constructor */ + NEDirectConvolutionLayer(); + /** Set the input, weights, biases and output tensors. + * + * @param[in, out] input Input tensor. Data types supported: QS8/F32. + * @param[in] weights Set of kernels to convolve the input volume. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported: Same as @p input. + * @param[in] bias Set of biases. Data type supported: Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel; + NEDirectConvolutionLayerKernel _conv_kernel; + NEFillBorderKernel _input_border_handler; + Tensor _accumulator; +}; +} +#endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h new file mode 100644 index 0000000000..6cf8008480 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ +#define __ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ + +#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "arm_compute/runtime/Distribution1D.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Lut.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute histogram equalization. This function calls the following NEON kernels: + * + * -# @ref NEHistogramKernel + * -# @ref NECumulativeDistributionKernel + * -# @ref NETableLookupKernel + * + */ +class NEEqualizeHistogram : public IFunction +{ +public: + /** Default Constructor. */ + NEEqualizeHistogram(); + /** Initialise the kernel's inputs. + * + * @note Currently the width of the input image must be a multiple of 16. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] output Output image. Data type supported: same as @p input + */ + void configure(const IImage *input, IImage *output); + + // Inherited methods overridden: + void run() override; + +private: + NEHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + NECumulativeDistributionKernel _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution + and creates the relevant LookupTable. */ + NETableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + Lut _cd_lut; /**< Holds the equalization lookuptable. */ + static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ + static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ +}; +} +#endif /*__ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h new file mode 100644 index 0000000000..940ae18471 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEErode.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEERODE_H__ +#define __ARM_COMPUTE_NEERODE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute erode. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEErodeKernel + * + */ +class NEErode : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode + * + * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_NEERODE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h new file mode 100644 index 0000000000..d7c31750c5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFastCorners.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFASTCORNERS_H__ +#define __ARM_COMPUTE_NEFASTCORNERS_H__ + +#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute fast corners. This function call the following NEON kernels: + * + * -# @ref NEFastCornersKernel + * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) + * -# @ref NEFillArrayKernel + * + */ +class NEFastCorners : public IFunction +{ +public: + /** Constructor */ + NEFastCorners(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + NEFastCornersKernel _fast_corners_kernel; + NEFillBorderKernel _border_handler; + NENonMaximaSuppression3x3Kernel _nonmax_kernel; + NEFillArrayKernel _fill_kernel; + Image _output; + Image _suppressed; + bool _non_max; +}; +} +#endif /*__ARM_COMPUTE_NEFASTCORNERS_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h new file mode 100644 index 0000000000..b6b7e77471 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLBORDER_H__ +#define __ARM_COMPUTE_NEFILLBORDER_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEFillBorderKernel */ +class NEFillBorder : public IFunction +{ +public: + /** Initialize the function's source, destination and border_mode. + * + * @note This function fills the borders within the XY-planes. + * + * @param[in, out] input Source tensor. Data type supported: U8/QS8/S16/S32/F32 + * @param[in] border_width Width of the tensor border in pixels. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run() override; + +private: + NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ +}; +} +#endif /*__ARM_COMPUTE_NEFILLBORDER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h new file mode 100644 index 0000000000..33ec4ef721 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: + * + * -# @ref NETransposeKernel (if @p transpose_weights is set to true) + * -# @ref NEGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class NEFullyConnectedLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + NEFullyConnectedLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. + * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + */ + void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer); + + // Inherited methods overridden: + void run() override; + +private: + NETransposeKernel _transpose_kernel; + NEGEMMTranspose1xWKernel _transpose1xW_kernel; + Tensor _transpose_output; + bool _transpose_weights; + bool _is_batched_fc_layer; +}; + +/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: + * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped flag is set to false) (called once) + * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input) + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class NEFullyConnectedLayer : public IFunction +{ +public: + /** Constructor */ + NEFullyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data type supported: QS8/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose the weights tensor if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); + + //Inherited methods override + void run() override; + +private: + void configure_fc_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_fc_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output); + + NEIm2ColKernel _im2col_kernel; + NEFullyConnectedLayerReshapeWeights _reshape_weights_kernel; + NEGEMMInterleave4x4Kernel _interleave4x4_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + Tensor _im2col_output; + Tensor _interleave4x4_output; + Tensor _reshape_weights_output; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _is_batched_fc_layer; + bool _accumulate_biases; +}; +} +#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h new file mode 100644 index 0000000000..a40aa910a5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMM_H__ +#define __ARM_COMPUTE_NEGEMM_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: + * + * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * + */ +class NEGEMM : public IFunction +{ +public: + /** Constructor */ + NEGEMM(); + /** Initialise the kernel's inputs, output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. + * + * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: QS8/F16/F32 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a + * @param[out] d Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta); + + // Inherited methods overridden: + void run() override; + +private: + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NEGEMMMatrixAdditionKernel _ma_kernel; + Tensor _tmp_a; + Tensor _tmp_b; + bool _run_vector_matrix_multiplication; + bool _run_addition; +}; +} +#endif /*__ARM_COMPUTE_NEGEMM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h new file mode 100644 index 0000000000..b911fd064f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ +#define __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute NEGEMMInterleave4x4Kernel. This function calls the following NEON kernel: + * + * -# @ref NEGEMMInterleave4x4Kernel + * + */ +class NEGEMMInterleave4x4 : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowp.h b/arm_compute/runtime/NEON/functions/NEGEMMLowp.h new file mode 100644 index 0000000000..bfb1a494b8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowp.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMLOWP_H__ +#define __ARM_COMPUTE_NEGEMMLOWP_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute GEMMLowp on NEON. This function calls the following NEON kernels: +* +* -# @ref NEGEMMInterleave4x4Kernel +* -# @ref NEGEMMTranspose1xWKernel +* -# @ref NEGEMMLowpMatrixMultiplyKernel +* +*/ +class NEGEMMLowp : public IFunction +{ +public: + /** Constructor */ + NEGEMMLowp(); + /** Initialise the kernel's inputs, output + * + * @note GEMM_LOWP: low precision GEMM kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + * @param[in] a First input tensor (Matrix A). Data type supported: U8. + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a + * @param[out] output Output tensor. Data type supported: same as @p a. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Value to be multiplied to each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ITensor *a, const ITensor *b, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + // Inherited methods overridden: + void run() override; + +private: + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMLowpMatrixMultiplyKernel _mm_kernel; + Tensor _tmp_a; + Tensor _tmp_b; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMLOWP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h new file mode 100644 index 0000000000..447b8c9c70 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ +#define __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels: + * + * -# @ref NEGEMMTranspose1xWKernel + * + */ +class NEGEMMTranspose1xW : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/ + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h new file mode 100644 index 0000000000..a237e6f0e5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN3x3_H__ +#define __ARM_COMPUTE_NEGAUSSIAN3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian3x3Kernel + * + */ +class NEGaussian3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's input, output and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h new file mode 100644 index 0000000000..699e42efb4 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN5x5_H__ +#define __ARM_COMPUTE_NEGAUSSIAN5x5_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian5x5HorKernel + * -# @ref NEGaussian5x5VertKernel + * + */ +class NEGaussian5x5 : public IFunction +{ +public: + /** Default constructor + */ + NEGaussian5x5(); + /** Initialise the function's input, output and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */ + NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */ + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN5x5_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h new file mode 100644 index 0000000000..5f0a67ea05 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ +#define __ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/Pyramid.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Common interface for all Gaussian pyramid functions */ +class NEGaussianPyramid : public IFunction +{ +public: + /** Default constructor */ + NEGaussianPyramid(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramid(const NEGaussianPyramid &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramid(NEGaussianPyramid &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramid &operator=(NEGaussianPyramid &&) = default; + /** Default destructor */ + virtual ~NEGaussianPyramid() = default; + + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8. + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0; + +protected: + const ITensor *_input; + IPyramid *_pyramid; + Pyramid _tmp; +}; + +/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussianPyramidHorKernel + * -# @ref NEGaussianPyramidVertKernel + * + */ +class NEGaussianPyramidHalf : public NEGaussianPyramid +{ +public: + /** Constructor */ + NEGaussianPyramidHalf(); + + // Inherited methods overridden: + void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _border_handler; + std::unique_ptr _horizontal_reduction; + std::unique_ptr _vertical_reduction; +}; + +/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian5x5 + * -# @ref NEScaleKernel + * + */ +class NEGaussianPyramidOrb : public NEGaussianPyramid +{ +public: + /** Constructor */ + NEGaussianPyramidOrb(); + + // Inherited methods overridden: + void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _offsets; + std::unique_ptr _gaus5x5; + std::unique_ptr _scale_nearest; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h new file mode 100644 index 0000000000..b7b4909060 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ +#define __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ + +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class IHOG; +/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels: + * + * -# @ref NEHOGGradient + * -# @ref NEHOGOrientationBinningKernel + * -# @ref NEHOGBlockNormalizationKernel + * + */ +class NEHOGDescriptor : public IFunction +{ +public: + /** Default constructor */ + NEHOGDescriptor(); + /** Initialise the function's source, destination, HOG data-object and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block + * @param[in] hog HOG data object which describes the HOG descriptor + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + NEHOGGradient _gradient; + NEHOGOrientationBinningKernel _orient_bin; + NEHOGBlockNormalizationKernel _block_norm; + Tensor _mag; + Tensor _phase; + Tensor _hog_space; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h new file mode 100644 index 0000000000..98b8a89bc1 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDETECTOR_H__ +#define __ARM_COMPUTE_NEHOGDETECTOR_H__ + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel: + * + * -# @ref NEHOGDetectorKernel + * + */ +class NEHOGDetector : public INESimpleFunction +{ +public: + /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class + * + * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. + * + * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 + * @param[in] hog HOG data-object that describes the HOG descriptor + * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the block stride stored in hog + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDETECTOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h new file mode 100644 index 0000000000..dd2d99adfe --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGGradient.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGGRADIENT_H__ +#define __ARM_COMPUTE_NEHOGGRADIENT_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEDerivative.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels: + * + * -# @ref NEDerivative + * -# NEMagnitudePhaseKernel + * + */ +class NEHOGGradient : public IFunction +{ +public: + /** Default constructor */ + NEHOGGradient(); + /** Initialise the function's source, destinations, phase type and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8. + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. + * @param[out] output_phase Output tensor.(phase). Format supported: U8 + * @param[in] phase_type Type of @ref PhaseType + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + NEDerivative _derivative; + std::unique_ptr _mag_phase; + Tensor _gx; + Tensor _gy; +}; +} +#endif /*__ARM_COMPUTE_NEHOGGRADIENT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h new file mode 100644 index 0000000000..2d07e6435f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ +#define __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ + +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels: + * + * -# @ref NEHOGGradient + * -# @ref NEHOGOrientationBinningKernel + * -# @ref NEHOGBlockNormalizationKernel + * -# @ref NEHOGDetector + * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) + * + * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: + * -# Phase type + -# Normalization type + -# L2 hysteresis threshold if the normalization type is L2HYS_NORM + * + */ +class NEHOGMultiDetection : public IFunction +{ +public: + /** Default constructor */ + NEHOGMultiDetection(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGMultiDetection(const NEHOGMultiDetection &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete; + /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. + * This container should store the HOG data-objects in descending or ascending cell_size width order. + * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects + * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects + * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object + * The dimension of this array must be the same of multi_hog->num_models() + * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. + * True if the non-maxima suppression stage has to be computed + * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage + * + */ + void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value = 0, + float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + + // Inherited method overridden: + void run() override; + +private: + NEHOGGradient _gradient_kernel; + std::unique_ptr _orient_bin_kernel; + std::unique_ptr _block_norm_kernel; + std::unique_ptr _hog_detect_kernel; + std::unique_ptr _non_maxima_kernel; + std::unique_ptr _hog_space; + std::unique_ptr _hog_norm_space; + IDetectionWindowArray *_detection_windows; + Tensor _mag; + Tensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h new file mode 100644 index 0000000000..a709871153 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHARRISCORNERS_H__ +#define __ARM_COMPUTE_NEHARRISCORNERS_H__ + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions: + * + * -# @ref NESobel3x3 (if gradient_size == 3) or
+ * @ref NESobel5x5 (if gradient_size == 5) or
+ * @ref NESobel7x7 (if gradient_size == 7) + * -# @ref NEFillBorderKernel + * -# NEHarrisScoreKernel<3> (if block_size == 3) or
+ * NEHarrisScoreKernel<5> (if block_size == 5) or
+ * NEHarrisScoreKernel<7> (if block_size == 7) + * -# @ref NENonMaximaSuppression3x3 + * -# @ref CPPCornerCandidatesKernel + * -# @ref CPPSortEuclideanDistanceKernel + * + */ +class NEHarrisCorners : public IFunction +{ +public: + /** Constructor + * + * Initialize _sobel, _harris_score and _corner_list to nullptr. + */ + NEHarrisCorners(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 + * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(IImage *input, float threshold, float min_dist, float sensitivity, + int32_t gradient_size, int32_t block_size, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Sobel function */ + std::unique_ptr _harris_score; /**< Harris score kernel */ + NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ + CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ + CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ + NEFillBorderKernel _border_gx; /**< Border handler before running harris score */ + NEFillBorderKernel _border_gy; /**< Border handler before running harris score */ + Image _gx; /**< Source image - Gx component */ + Image _gy; /**< Source image - Gy component */ + Image _score; /**< Source image - Harris score */ + Image _nonmax; /**< Source image - Non-Maxima suppressed image */ + std::unique_ptr _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ + int32_t _num_corner_candidates; /**< Number of potential corner candidates */ +}; +} +#endif /*__ARM_COMPUTE_NEHARRISCORNERS_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h new file mode 100644 index 0000000000..c24510dcb3 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHistogram.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHISTOGRAM_H__ +#define __ARM_COMPUTE_NEHISTOGRAM_H__ + +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class IDistribution1D; + +/** Basic function to run @ref NEHistogramKernel. */ +class NEHistogram : public IFunction +{ +public: + /** Default Constructor. */ + NEHistogram(); + /** Initialise the kernel's inputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] output Output distribution. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run() override; + +private: + NEHistogramKernel _histogram_kernel; + std::unique_ptr _local_hist; + std::unique_ptr _window_lut; + size_t _local_hist_size; + /** 256 possible pixel values as we handle only U8 images */ + static constexpr unsigned int window_lut_default_size = 256; +}; +} +#endif /*__ARM_COMPUTE_NEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h new file mode 100644 index 0000000000..6d7dd697e8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEIntegralImage.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEINTEGRALIMAGE_H__ +#define __ARM_COMPUTE_NEINTEGRALIMAGE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run a @ref NEIntegralImageKernel */ +class NEIntegralImage : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U32. + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEINTEGRALIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h new file mode 100644 index 0000000000..991ae7c293 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELAPLACIANPYRAMID_H__ +#define __ARM_COMPUTE_NELAPLACIANPYRAMID_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" +#include "arm_compute/runtime/Pyramid.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions: + * + * -# @ref NEGaussianPyramidHalf + * -# @ref NEGaussian5x5 + * -# @ref NEArithmeticSubtraction + * + * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then + * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. + * L(i) = I(i) - Gaussian5x5(I(i)) + * Level 0 has always the same first two dimensions as the input tensor. +*/ +class NELaplacianPyramid : public IFunction +{ +public: + /** Constructor */ + NELaplacianPyramid(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16. + * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16. + * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: + * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + size_t _num_levels; + NEGaussianPyramidHalf _gaussian_pyr_function; + std::unique_ptr _convf; + std::unique_ptr _subf; + Pyramid _gauss_pyr; + Pyramid _conv_pyr; + NEDepthConvert _depth_function; +}; +} +#endif /*__ARM_COMPUTE_NELAPLACIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h new file mode 100644 index 0000000000..4139733499 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ +#define __ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEScale.h" +#include "arm_compute/runtime/Pyramid.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions: + * + * -# @ref NEArithmeticAddition + * -# @ref NEScale + * -# @ref NEDepthConvert + * + * This function reconstructs the original image from a Laplacian Image Pyramid. + * + * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the + * resolution of the next pyramid level. + * + * I(n-2) = upsample( input + L(n-1) + * + * For each pyramid level i, except i=0 and i=n-1: + * I(i-1) = upsample(I(i) + L(i)) + * + * output = I(0) + L(0) +*/ +class NELaplacianReconstruct : public IFunction +{ +public: + /** Constructor */ + NELaplacianReconstruct(); + /** Initialise the function's source, destinations and border mode. + * + * The Output image must have the same size as the first level of the pyramid. + * The Input image must have the same size as the last level of the pyramid. + * + * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. + * + * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16. + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const IPyramid *pyramid, const ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + Pyramid _tmp_pyr; + std::unique_ptr _addf; + std::unique_ptr _scalef; + NEDepthConvert _depthf; +}; +} +#endif /*__ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h new file mode 100644 index 0000000000..1b2b2ee3cf --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class INETensor; + +/** Basic function to compute the locally connected layer. This function calls the following NEON kernels: + * + * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) + * -# @ref NEIm2ColKernel + * -# @ref NELocallyConnectedMatrixMultiplyKernel + * -# @ref NECol2ImKernel + */ +class NELocallyConnectedLayer : public IFunction +{ +public: + /** Default constructor */ + NELocallyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + NEIm2ColKernel _input_im2col_kernel; + NEWeightsReshapeKernel _weights_reshape_kernel; + NELocallyConnectedMatrixMultiplyKernel _mm_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + bool _is_first_run; +}; +} +#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h new file mode 100644 index 0000000000..6c1f988ef0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMAGNITUDE_H__ +#define __ARM_COMPUTE_NEMAGNITUDE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run NEMagnitudePhaseKernel */ +class NEMagnitude : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs. + * + * @param[in] input1 First tensor input. Data type supported: S16. + * @param[in] input2 Second tensor input. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: S16. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, bool use_fp16 = false); +}; +} +#endif /*__ARM_COMPUTE_NEMAGNITUDE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h new file mode 100644 index 0000000000..3770b2a270 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEANSTDDEV_H__ +#define __ARM_COMPUTE_NEMEANSTDDEV_H__ + +#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic function to execute mean and std deviation. This function calls the following NEON kernels: + * + * @ref NEMeanStdDevKernel + * + */ +class NEMeanStdDev : public IFunction +{ +public: + /** Default Constructor. */ + NEMeanStdDev(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + */ + void configure(const IImage *input, float *mean, float *stddev = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ +}; +} +#endif /*__ARM_COMPUTE_NEMEANSTDDEV_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h new file mode 100644 index 0000000000..a3df687a35 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMedian3x3.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEDIAN3x3_H__ +#define __ARM_COMPUTE_NEMEDIAN3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute median filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEMedian3x3Kernel + * + */ +class NEMedian3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEMEDIAN3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h new file mode 100644 index 0000000000..82e75ee48b --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMINMAXLOCATION_H__ +#define __ARM_COMPUTE_NEMINMAXLOCATION_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute min and max location. This function calls the following NEON kernels: + * + * -# NEMinMaxKernel + * -# NEMinMaxLocationKernel + */ +class NEMinMaxLocation : public IFunction +{ +public: + /** Constructor */ + NEMinMaxLocation(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data types supported: U8/S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc (Optional) Array of minimum value locations. + * @param[out] max_loc (Optional) Array of maximum value locations. + * @param[out] min_count (Optional) Number of minimum value encounters. + * @param[out] max_count (Optional) Number of maximum value encounters. + */ + void configure(const IImage *input, int32_t *min, int32_t *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + NEMinMaxKernel _min_max; /**< Kernel that performs min/max */ + NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */ +}; +} +#endif /*__ARM_COMPUTE_NEMINMAXLOCATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h new file mode 100644 index 0000000000..d8a9eaebfb --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONLINEARFILTER_H__ +#define __ARM_COMPUTE_NENONLINEARFILTER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute non linear filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NENonLinearFilterKernel + * + * @note Supported mask dimensions squares of sizes 3, 5 + */ +class NENonLinearFilter : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, + uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NENONLINEARFILTER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h new file mode 100644 index 0000000000..c87d722878 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ +#define __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NENonMaximaSuppression3x3Kernel + * + */ +class NENonMaximaSuppression3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * The constant values used with CONSTANT border mode is 0 + * + * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input + * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode); +}; +} +#endif /* __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h new file mode 100644 index 0000000000..3202867c43 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/Tensor.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to simulate a normalization layer. This function calls the following NEON kernels: + * + * -# @ref NEPixelWiseMultiplicationKernel + * -# @ref NEFillBorderKernel + * -# @ref NENormalizationLayerKernel + * + */ +class NENormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + NENormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data type supported: QS8/F32 + * @param[out] output Destination with the same dimensions, data type and number of channels of @p input + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run() override; + +private: + NENormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel */ + NEPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel */ + NEFillBorderKernel _border_handler; /**< Kernel to handle borders */ + Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ +}; +} +#endif /* __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h new file mode 100644 index 0000000000..0534551d19 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEOPTICALFLOW_H__ +#define __ARM_COMPUTE_NEOPTICALFLOW_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include +#include + +namespace arm_compute +{ +class Pyramid; + +using LKInternalKeypointArray = Array; +/** Basic function to execute optical flow. This function calls the following NEON kernels and functions: + * + * -# @ref NEScharr3x3 + * -# @ref NELKTrackerKernel + * + */ +class NEOpticalFlow : public IFunction +{ +public: + /** Constructor */ + NEOpticalFlow(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEOpticalFlow(const NEOpticalFlow &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEOpticalFlow &operator=(const NEOpticalFlow &) = delete; + /** Initialise the function input and output + * + * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8 + * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] border_mode The border mode applied at scharr kernel stage + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT + * + */ + void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, + IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, + bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _func_scharr; + std::unique_ptr _kernel_tracker; + std::unique_ptr _scharr_gx; + std::unique_ptr _scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + LKInternalKeypointArray _new_points_internal; + LKInternalKeypointArray _old_points_internal; + unsigned int _num_levels; +}; +} +#endif /*__ARM_COMPUTE_NEOPTICALFLOW_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h new file mode 100644 index 0000000000..985ba84c4c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPhase.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPHASE_H__ +#define __ARM_COMPUTE_NEPHASE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run NEMagnitudePhaseKernel */ +class NEPhase : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input1 First tensor input. Data type supported: S16. + * @param[in] input2 Second tensor input. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEPHASE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h new file mode 100644 index 0000000000..de7a797cd8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ +#define __ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEPixelWiseMultiplicationKernel */ +class NEPixelWiseMultiplication : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8/QS8/S16/F32. + * @param[in] input2 Second tensor input. Data types supported: U8/QS8/S16/F32. + * @param[out] output Output tensor. Data types supported: U8/QS8/S16/F32. + * @param[in] scale Scale to apply after multiplication. Must be positive. + * @param[in] overflow_policy Overflow policy. + * @param[in] rounding_policy Rounding policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); +}; +} +#endif /*__ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h new file mode 100644 index 0000000000..5a9cffa5ae --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPOOLINGLAYER_H__ +#define __ARM_COMPUTE_NEPOOLINGLAYER_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if padding size is different from zero) + * -# @ref NEPoolingLayerKernel + */ +class NEPoolingLayer : public INESimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); +}; +} +#endif /* __ARM_COMPUTE_NEPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h new file mode 100644 index 0000000000..b1ec559817 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NERemap.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEREMAP_H__ +#define __ARM_COMPUTE_NEREMAP_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute remap. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NERemapKernel + */ +class NERemap : public INESimpleFunction +{ +public: + /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, + InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEREMAP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h new file mode 100644 index 0000000000..e1da891dcf --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCALEIMAGE_H__ +#define __ARM_COMPUTE_NESCALEIMAGE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEScaleKernel */ +class NEScale : public INESimpleFunction +{ +public: + /** Constructor + * + * Initialize NEScale + */ + NEScale(); + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + +private: + Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */ + Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */ + Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */ +}; +} +#endif /*__ARM_COMPUTE_NESCALEIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h new file mode 100644 index 0000000000..db24723902 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEScharr3x3.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHARR3x3_H__ +#define __ARM_COMPUTE_NESCHARR3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEScharr3x3Kernel + * + */ +class NEScharr3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NESCHARR3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h new file mode 100644 index 0000000000..e2896ba058 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel3x3.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL3x3_H__ +#define __ARM_COMPUTE_NESOBEL3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel3x3Kernel + * + */ +class NESobel3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NESOBEL3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h new file mode 100644 index 0000000000..fc4d665a70 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel5x5.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL5x5_H__ +#define __ARM_COMPUTE_NESOBEL5x5_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel5x5HorKernel + * -# @ref NESobel5x5VertKernel + * + */ +class NESobel5x5 : public IFunction +{ +public: + /** Default constructor */ + NESobel5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NESobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + NESobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL5x5_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h new file mode 100644 index 0000000000..06b7c80ad6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel7x7.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL7x7_H__ +#define __ARM_COMPUTE_NESOBEL7x7_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel7x7HorKernel + * -# @ref NESobel7x7VertKernel + * + */ +class NESobel7x7 : public IFunction +{ +public: + /** Default constructor */ + NESobel7x7(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32. + * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NESobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + NESobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL7x7_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h new file mode 100644 index 0000000000..dc84dec0e4 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOFTMAXLAYER_H__ +#define __ARM_COMPUTE_NESOFTMAXLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to compute a SoftmaxLayer. + * + * Softmax is calculated by : + * @f[ out = \frac{e^{x - max(x)}}{\sum{e^{x - max(x)}}} @f] + * + * This function runs the following kernels: + * -# @ref NELogits1DMaxKernel + * -# @ref NELogits1DShiftExpSumKernel + * -# @ref NELogits1DNormKernel + */ +class NESoftmaxLayer : public IFunction +{ +public: + /** Constructor */ + NESoftmaxLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run() override; + +private: + NELogits1DMaxKernel _max_kernel; + NELogits1DShiftExpSumKernel _shift_exp_sum_kernel; + NELogits1DNormKernel _norm_kernel; + NEFillBorderKernel _fill_border_kernel; + Tensor _max; + Tensor _sum; + Tensor _tmp; +}; +} +#endif /* __ARM_COMPUTE_NESOFTMAXLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h new file mode 100644 index 0000000000..b59ffb877c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETableLookup.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETABLELOOKUP_H__ +#define __ARM_COMPUTE_NETABLELOOKUP_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Basic function to run @ref NETableLookupKernel */ +class NETableLookup : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input First tensor input. Data types supported: U8/S16 + * @param[in] lut Input lookup table. + * @param[out] output Output tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NETABLELOOKUP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h new file mode 100644 index 0000000000..d407ee5b15 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEThreshold.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETHRESHOLD_H__ +#define __ARM_COMPUTE_NETHRESHOLD_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEThresholdKernel */ +class NEThreshold : public INESimpleFunction +{ +public: + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold + * @param[in] false_value Value to assign when the condition is false + * @param[in] true_value value to assign when the condition is true + * @param[in] type Thresholding type. Can either be BINARY or RANGE. + * @param[in] upper Upper threshold. Only used with RANGE thresholding + */ + void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, + ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLD_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h new file mode 100644 index 0000000000..4b606e7282 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETRANSPOSE_H__ +#define __ARM_COMPUTE_NETRANSPOSE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel: + * + * -# @ref NETransposeKernel + * + */ +class NETranspose : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} + +#endif /* __ARM_COMPUTE_NETRANSPOSE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h new file mode 100644 index 0000000000..f8eebe8d2a --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEWarpAffine.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPAFFINE_H__ +#define __ARM_COMPUTE_NEWARPAFFINE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEWarpAffineKernel */ +class NEWarpAffine : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEWARPAFFINE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h new file mode 100644 index 0000000000..d0699291b1 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPPERSPECTIVE_H__ +#define __ARM_COMPUTE_NEWARPPERSPECTIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEWarpPerspectiveKernel */ +class NEWarpPerspective : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEWARPPERSPECTIVE_H__ */ diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h new file mode 100644 index 0000000000..21df6a699d --- /dev/null +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OMPSCHEDULER_H__ +#define __ARM_COMPUTE_OMPSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +namespace arm_compute +{ +/** Pool of threads to automatically split a kernel's execution among several threads. */ +class OMPScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then the number returned by omp_get_max_threads() will be used, otherwise the number of threads specified. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the OMPScheduler has in its pool. + * + * @return Number of threads available in OMPScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static OMPScheduler &get(); + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor. */ + OMPScheduler(); + + unsigned int _num_threads; +}; +} +#endif /* __ARM_COMPUTE_OMPSCHEDULER_H__ */ diff --git a/arm_compute/runtime/Pyramid.h b/arm_compute/runtime/Pyramid.h new file mode 100644 index 0000000000..2e7613759f --- /dev/null +++ b/arm_compute/runtime/Pyramid.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PYRAMID_H__ +#define __ARM_COMPUTE_PYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class Tensor; + +/** Basic implementation of the pyramid interface */ +class Pyramid : public IPyramid +{ +public: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + */ + void init(const PyramidInfo &info); + + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] info Pyramid's metadata + */ + void init_auto_padding(const PyramidInfo &info); + + /** Allocate the planes in the pyramid */ + void allocate(); + + // Inherited method overridden + const PyramidInfo *info() const override; + Tensor *get_pyramid_level(size_t index) const override; + +private: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + * @param[in] auto_padding Specifies whether the image in the pyramid use auto padding + */ + void internal_init(const PyramidInfo &info, bool auto_padding); + + PyramidInfo _info{}; + std::unique_ptr _pyramid{ nullptr }; +}; +} +#endif /*__ARM_COMPUTE_PYRAMID_H__ */ diff --git a/arm_compute/runtime/Scheduler.h b/arm_compute/runtime/Scheduler.h new file mode 100644 index 0000000000..21f944b75f --- /dev/null +++ b/arm_compute/runtime/Scheduler.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SCHEDULER_H__ +#define __ARM_COMPUTE_SCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" +#include + +namespace arm_compute +{ +/** Configurable scheduler which supports multiple multithreading APIs and choosing between different schedulers at runtime. */ +class Scheduler +{ +public: + enum class Type + { + ST, // Single thread. + CPP, // C++11 threads. + OMP, // OpenMP. + CUSTOM // Provided by the user. + }; + /** Sets the user defined scheduler and makes it the active scheduler. + * + * @param[in] scheduler A shared pointer to a custom scheduler implemented by the user. + */ + static void set(std::shared_ptr &scheduler); + /** Access the scheduler singleton. + * + * @return A reference to the scheduler object. + */ + static IScheduler &get(); + /** Set the active scheduler. + * + * Only one scheduler can be enabled at any time. + * + * @param[in] t the type of the scheduler to be enabled. + */ + static void set(Type t); + /** Returns the type of the active scheduler. + * + * @return The current scheduler's type. + */ + static Type get_type(); + /** Returns true if the given scheduler type is supported. False otherwise. + * + * @return true if the given scheduler type is supported. False otherwise. + */ + static bool is_available(Type t); + +private: + static Type _scheduler_type; + static std::shared_ptr _custom_scheduler; + Scheduler(); +}; +} +#endif /* __ARM_COMPUTE_SCHEDULER_H__ */ diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h new file mode 100644 index 0000000000..a6e1defe7c --- /dev/null +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ +#define __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +namespace arm_compute +{ +/** Pool of threads to automatically split a kernel's execution among several threads. */ +class SingleThreadScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads This is ignored for this scheduler as the number of threads is always one. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the SingleThreadScheduler has, which is always 1. + * + * @return Number of threads available in SingleThreadScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static SingleThreadScheduler &get(); + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor. */ + SingleThreadScheduler() = default; +}; +} +#endif /* __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ */ diff --git a/arm_compute/runtime/SubTensor.h b/arm_compute/runtime/SubTensor.h new file mode 100644 index 0000000000..bdb229de49 --- /dev/null +++ b/arm_compute/runtime/SubTensor.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SUBTENSOR_H__ +#define __ARM_COMPUTE_SUBTENSOR_H__ + +#include "arm_compute/core/SubTensorInfo.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the sub-tensor interface */ +class SubTensor : public ITensor +{ +public: + /** Constructor + * + * @param[in] parent Parent tensor + * @param[in] tensor_shape Shape of the subtensor + * @param[in] coords Coordinates of the first subtensor element inside the parent tensor. + */ + SubTensor(ITensor *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Destructor: free the tensor's memory */ + ~SubTensor() = default; + /** Restrict instances of this class to be copy constructed */ + SubTensor(const SubTensor &) = delete; + /** Restrict instances of this class to be copied */ + SubTensor &operator=(const SubTensor &) = delete; + /** Allow instances of this class to be move constructed */ + SubTensor(SubTensor &&) = default; + /** Allow instances of this class to be moved */ + SubTensor &operator=(SubTensor &&) = default; + /** Return the parent tensor of the subtensor + * + * @return Parent tensor + */ + ITensor *parent(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + uint8_t *buffer() const override; + +private: + ITensor *_parent; + mutable SubTensorInfo _info; +}; +} +#endif /*__ARM_COMPUTE_SUBTENSOR_H__ */ diff --git a/arm_compute/runtime/Tensor.h b/arm_compute/runtime/Tensor.h new file mode 100644 index 0000000000..1fe73a2353 --- /dev/null +++ b/arm_compute/runtime/Tensor.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSOR_H__ +#define __ARM_COMPUTE_TENSOR_H__ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the tensor interface */ +class Tensor : public ITensor +{ +public: + /** Constructor */ + Tensor(); + /** Destructor: free the tensor's memory */ + ~Tensor() = default; + /** Allow instances of this class to be move constructed */ + Tensor(Tensor &&) = default; + /** Allow instances of this class to be moved */ + Tensor &operator=(Tensor &&) = default; + /** Return a pointer to the tensor's allocator + * + * @return A pointer to the tensor's allocator + */ + TensorAllocator *allocator(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + uint8_t *buffer() const override; + +private: + mutable TensorAllocator _allocator; /**< Instance of the basic CPU allocator.*/ +}; + +using Image = Tensor; +} +#endif /*__ARM_COMPUTE_TENSOR_H__ */ diff --git a/arm_compute/runtime/TensorAllocator.h b/arm_compute/runtime/TensorAllocator.h new file mode 100644 index 0000000000..450323b3ab --- /dev/null +++ b/arm_compute/runtime/TensorAllocator.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORALLOCATOR_H__ +#define __ARM_COMPUTE_TENSORALLOCATOR_H__ + +#include "arm_compute/runtime/ITensorAllocator.h" + +#include +#include +#include + +namespace arm_compute +{ +class Coordinates; +class TensorInfo; + +/** Basic implementation of a CPU memory tensor allocator. */ +class TensorAllocator : public ITensorAllocator +{ +public: + /** Default constructor. */ + TensorAllocator(); + + /** Make ITensorAllocator's init methods available */ + using ITensorAllocator::init; + + /** Shares the same backing memory with another tensor allocator, while the tensor info might be different. + * In other words this can be used to create a sub-tensor from another tensor while sharing the same memory. + * + * @note TensorAllocator have to be of the same specialized type. + * + * @param[in] allocator The allocator that owns the backing memory to be shared. Ownership becomes shared afterwards. + * @param[in] coords The starting coordinates of the new tensor inside the parent tensor. + * @param[in] sub_info The new tensor information (e.g. shape etc) + */ + void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info); + + /** Returns the pointer to the allocated data. */ + uint8_t *data() const; + + /** Allocate size specified by TensorInfo of CPU memory. + * + * @note The tensor must not already be allocated when calling this function. + * + */ + void allocate() override; + + /** Free allocated CPU memory. + * + * @note The tensor must have been allocated when calling this function. + * + */ + void free() override; + +protected: + /** No-op for CPU memory + * + * @return A pointer to the beginning of the tensor's allocation. + */ + uint8_t *lock() override; + + /** No-op for CPU memory. */ + void unlock() override; + +private: + std::shared_ptr> _buffer; /**< CPU memory allocation. */ +}; +} +#endif /* __ARM_COMPUTE_TENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/Utils.h b/arm_compute/runtime/Utils.h new file mode 100644 index 0000000000..2f037a0621 --- /dev/null +++ b/arm_compute/runtime/Utils.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_RUNTIME_UTILS_H__ +#define __ARM_COMPUTE_RUNTIME_UTILS_H__ + +#include "arm_compute/runtime/Scheduler.h" + +#include + +namespace arm_compute +{ +/** Convert a Scheduler::Type into a string. + * + * @param[in] t @ref Scheduler::Type to be translated to string. + * + * @return The string describing the scheduler type. + */ +const std::string &string_from_scheduler_type(Scheduler::Type t); +} +#endif /* __ARM_COMPUTE_RUNTIME_UTILS_H__ */ -- cgit v1.2.1