From 6ff3b19ee6120edf015fad8caab2991faa3070af Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Mon, 4 Sep 2017 18:44:23 +0100 Subject: COMPMID-344 Updated doxygen Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae --- .clang-format | 48 + .clang-tidy | 410 + .gitmodules | 8 + 3rdparty | 1 + SConscript | 199 + SConstruct | 208 + arm_compute/core/AccessWindowAutoPadding.h | 76 + arm_compute/core/AccessWindowStatic.h | 92 + arm_compute/core/AccessWindowTranspose.h | 48 + arm_compute/core/CL/CLHelpers.h | 105 + arm_compute/core/CL/CLKernelLibrary.h | 248 + arm_compute/core/CL/CLKernels.h | 90 + arm_compute/core/CL/CLTypes.h | 41 + arm_compute/core/CL/ICLArray.h | 118 + arm_compute/core/CL/ICLDistribution1D.h | 102 + arm_compute/core/CL/ICLHOG.h | 113 + arm_compute/core/CL/ICLKernel.h | 157 + arm_compute/core/CL/ICLLut.h | 94 + arm_compute/core/CL/ICLMultiHOG.h | 56 + arm_compute/core/CL/ICLMultiImage.h | 58 + arm_compute/core/CL/ICLSimple2DKernel.h | 41 + arm_compute/core/CL/ICLSimple3DKernel.h | 43 + arm_compute/core/CL/ICLSimpleKernel.h | 66 + arm_compute/core/CL/ICLTensor.h | 106 + arm_compute/core/CL/OpenCL.h | 43 + .../core/CL/kernels/CLAbsoluteDifferenceKernel.h | 71 + arm_compute/core/CL/kernels/CLAccumulateKernel.h | 91 + .../core/CL/kernels/CLActivationLayerKernel.h | 46 + .../core/CL/kernels/CLArithmeticAdditionKernel.h | 72 + .../CL/kernels/CLArithmeticSubtractionKernel.h | 74 + .../CL/kernels/CLBatchNormalizationLayerKernel.h | 77 + arm_compute/core/CL/kernels/CLBitwiseAndKernel.h | 68 + arm_compute/core/CL/kernels/CLBitwiseNotKernel.h | 49 + arm_compute/core/CL/kernels/CLBitwiseOrKernel.h | 68 + arm_compute/core/CL/kernels/CLBitwiseXorKernel.h | 68 + arm_compute/core/CL/kernels/CLBox3x3Kernel.h | 51 + arm_compute/core/CL/kernels/CLCannyEdgeKernel.h | 147 + .../core/CL/kernels/CLChannelCombineKernel.h | 83 + .../core/CL/kernels/CLChannelExtractKernel.h | 79 + arm_compute/core/CL/kernels/CLCol2ImKernel.h | 86 + arm_compute/core/CL/kernels/CLColorConvertKernel.h | 90 + arm_compute/core/CL/kernels/CLConvolutionKernel.h | 182 + .../core/CL/kernels/CLDepthConcatenateKernel.h | 76 + arm_compute/core/CL/kernels/CLDepthConvertKernel.h | 61 + arm_compute/core/CL/kernels/CLDerivativeKernel.h | 72 + arm_compute/core/CL/kernels/CLDilateKernel.h | 51 + arm_compute/core/CL/kernels/CLErodeKernel.h | 51 + arm_compute/core/CL/kernels/CLFastCornersKernel.h | 114 + arm_compute/core/CL/kernels/CLFillBorderKernel.h | 77 + .../core/CL/kernels/CLGEMMInterleave4x4Kernel.h | 80 + .../CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h | 81 + .../kernels/CLGEMMMatrixAccumulateBiasesKernel.h | 63 + .../core/CL/kernels/CLGEMMMatrixAdditionKernel.h | 70 + .../core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 73 + .../core/CL/kernels/CLGEMMTranspose1xWKernel.h | 84 + arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h | 51 + arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h | 67 + .../core/CL/kernels/CLGaussianPyramidKernel.h | 100 + .../core/CL/kernels/CLHOGDescriptorKernel.h | 105 + arm_compute/core/CL/kernels/CLHOGDetectorKernel.h | 82 + .../core/CL/kernels/CLHarrisCornersKernel.h | 85 + arm_compute/core/CL/kernels/CLHistogramKernel.h | 98 + arm_compute/core/CL/kernels/CLIm2ColKernel.h | 111 + .../core/CL/kernels/CLIntegralImageKernel.h | 73 + arm_compute/core/CL/kernels/CLLKTrackerKernel.h | 183 + .../CLLocallyConnectedMatrixMultiplyKernel.h | 68 + .../core/CL/kernels/CLMagnitudePhaseKernel.h | 77 + arm_compute/core/CL/kernels/CLMeanStdDevKernel.h | 74 + arm_compute/core/CL/kernels/CLMedian3x3Kernel.h | 51 + .../core/CL/kernels/CLMinMaxLocationKernel.h | 104 + .../core/CL/kernels/CLNonLinearFilterKernel.h | 63 + .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h | 52 + .../core/CL/kernels/CLNormalizationLayerKernel.h | 71 + .../CL/kernels/CLPixelWiseMultiplicationKernel.h | 73 + arm_compute/core/CL/kernels/CLPoolingLayerKernel.h | 69 + arm_compute/core/CL/kernels/CLRemapKernel.h | 70 + arm_compute/core/CL/kernels/CLScaleKernel.h | 55 + arm_compute/core/CL/kernels/CLScharr3x3Kernel.h | 86 + arm_compute/core/CL/kernels/CLSobel3x3Kernel.h | 72 + arm_compute/core/CL/kernels/CLSobel5x5Kernel.h | 116 + arm_compute/core/CL/kernels/CLSobel7x7Kernel.h | 116 + arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h | 109 + arm_compute/core/CL/kernels/CLTableLookupKernel.h | 47 + arm_compute/core/CL/kernels/CLThresholdKernel.h | 56 + arm_compute/core/CL/kernels/CLTransposeKernel.h | 49 + arm_compute/core/CL/kernels/CLWarpAffineKernel.h | 51 + .../core/CL/kernels/CLWarpPerspectiveKernel.h | 51 + .../core/CL/kernels/CLWeightsReshapeKernel.h | 114 + arm_compute/core/CPP/CPPKernels.h | 32 + arm_compute/core/CPP/ICPPKernel.h | 53 + arm_compute/core/CPP/ICPPSimpleKernel.h | 66 + .../core/CPP/kernels/CPPCornerCandidatesKernel.h | 74 + .../CPPDetectionWindowNonMaximaSuppressionKernel.h | 72 + .../CPP/kernels/CPPSortEuclideanDistanceKernel.h | 70 + arm_compute/core/Coordinates.h | 61 + arm_compute/core/Dimensions.h | 178 + arm_compute/core/Error.h | 160 + arm_compute/core/FixedPoint.h | 217 + arm_compute/core/FixedPoint.inl | 252 + arm_compute/core/HOGInfo.h | 146 + arm_compute/core/Helpers.h | 507 + arm_compute/core/Helpers.inl | 306 + arm_compute/core/IAccessWindow.h | 225 + arm_compute/core/IArray.h | 149 + arm_compute/core/IDistribution.h | 59 + arm_compute/core/IDistribution1D.h | 84 + arm_compute/core/IHOG.h | 54 + arm_compute/core/IKernel.h | 72 + arm_compute/core/ILut.h | 69 + arm_compute/core/IMultiHOG.h | 61 + arm_compute/core/IMultiImage.h | 60 + arm_compute/core/IPyramid.h | 56 + arm_compute/core/ITensor.h | 90 + arm_compute/core/ITensorInfo.h | 195 + arm_compute/core/MultiImageInfo.h | 66 + arm_compute/core/NEON/INEKernel.h | 33 + arm_compute/core/NEON/INESimpleKernel.h | 33 + arm_compute/core/NEON/NEColorConvertHelper.inl | 888 ++ arm_compute/core/NEON/NEFixedPoint.h | 686 + arm_compute/core/NEON/NEFixedPoint.inl | 1018 ++ arm_compute/core/NEON/NEKernels.h | 96 + arm_compute/core/NEON/NEMath.h | 96 + arm_compute/core/NEON/NEMath.inl | 141 + .../core/NEON/kernels/NEAbsoluteDifferenceKernel.h | 82 + arm_compute/core/NEON/kernels/NEAccumulateKernel.h | 122 + .../core/NEON/kernels/NEActivationLayerKernel.h | 84 + .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 79 + .../NEON/kernels/NEArithmeticSubtractionKernel.h | 79 + .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 78 + arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h | 68 + arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h | 66 + arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h | 68 + arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h | 68 + arm_compute/core/NEON/kernels/NEBox3x3Kernel.h | 62 + arm_compute/core/NEON/kernels/NECannyEdgeKernel.h | 190 + .../core/NEON/kernels/NEChannelCombineKernel.h | 125 + .../core/NEON/kernels/NEChannelExtractKernel.h | 109 + arm_compute/core/NEON/kernels/NECol2ImKernel.h | 100 + .../core/NEON/kernels/NEColorConvertKernel.h | 88 + .../core/NEON/kernels/NEConvolutionKernel.h | 251 + .../NEON/kernels/NECumulativeDistributionKernel.h | 80 + .../core/NEON/kernels/NEDepthConcatenateKernel.h | 76 + .../core/NEON/kernels/NEDepthConvertKernel.h | 68 + arm_compute/core/NEON/kernels/NEDerivativeKernel.h | 94 + arm_compute/core/NEON/kernels/NEDilateKernel.h | 49 + .../NEDirectConvolutionLayerBiasAccumulateKernel.h | 74 + .../NEON/kernels/NEDirectConvolutionLayerKernel.h | 76 + arm_compute/core/NEON/kernels/NEErodeKernel.h | 49 + .../core/NEON/kernels/NEFastCornersKernel.h | 72 + arm_compute/core/NEON/kernels/NEFillArrayKernel.h | 73 + arm_compute/core/NEON/kernels/NEFillBorderKernel.h | 79 + .../core/NEON/kernels/NEFillInnerBorderKernel.h | 75 + .../core/NEON/kernels/NEGEMMInterleave4x4Kernel.h | 79 + .../NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h | 88 + .../kernels/NEGEMMMatrixAccumulateBiasesKernel.h | 63 + .../core/NEON/kernels/NEGEMMMatrixAdditionKernel.h | 81 + .../core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 75 + .../core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 82 + .../core/NEON/kernels/NEGaussian3x3Kernel.h | 50 + .../core/NEON/kernels/NEGaussian5x5Kernel.h | 73 + .../core/NEON/kernels/NEGaussianPyramidKernel.h | 100 + .../core/NEON/kernels/NEHOGDescriptorKernel.h | 141 + .../core/NEON/kernels/NEHOGDetectorKernel.h | 87 + .../core/NEON/kernels/NEHarrisCornersKernel.h | 126 + arm_compute/core/NEON/kernels/NEHistogramKernel.h | 129 + arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 114 + .../core/NEON/kernels/NEIntegralImageKernel.h | 50 + arm_compute/core/NEON/kernels/NELKTrackerKernel.h | 144 + .../NELocallyConnectedMatrixMultiplyKernel.h | 64 + .../core/NEON/kernels/NEMagnitudePhaseKernel.h | 164 + arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h | 76 + arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h | 50 + .../core/NEON/kernels/NEMinMaxLocationKernel.h | 161 + .../core/NEON/kernels/NENonLinearFilterKernel.h | 147 + .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 99 + .../core/NEON/kernels/NENormalizationLayerKernel.h | 106 + .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 105 + .../core/NEON/kernels/NEPoolingLayerKernel.h | 106 + arm_compute/core/NEON/kernels/NERemapKernel.h | 78 + arm_compute/core/NEON/kernels/NEScaleKernel.h | 89 + arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h | 82 + arm_compute/core/NEON/kernels/NESobel3x3Kernel.h | 82 + arm_compute/core/NEON/kernels/NESobel5x5Kernel.h | 118 + arm_compute/core/NEON/kernels/NESobel7x7Kernel.h | 122 + .../core/NEON/kernels/NESoftmaxLayerKernel.h | 135 + .../core/NEON/kernels/NETableLookupKernel.h | 76 + arm_compute/core/NEON/kernels/NEThresholdKernel.h | 81 + arm_compute/core/NEON/kernels/NETransposeKernel.h | 78 + arm_compute/core/NEON/kernels/NEWarpKernel.h | 117 + .../core/NEON/kernels/NEWeightsReshapeKernel.h | 94 + arm_compute/core/PixelValue.h | 168 + arm_compute/core/PyramidInfo.h | 131 + arm_compute/core/Size2D.h | 84 + arm_compute/core/Steps.h | 66 + arm_compute/core/Strides.h | 62 + arm_compute/core/SubTensorInfo.h | 184 + arm_compute/core/TensorInfo.h | 300 + arm_compute/core/TensorShape.h | 141 + arm_compute/core/Types.h | 636 + arm_compute/core/Utils.h | 740 ++ arm_compute/core/Validate.h | 563 + arm_compute/core/Window.h | 355 + arm_compute/core/Window.inl | 182 + arm_compute/runtime/Array.h | 75 + arm_compute/runtime/CL/CLArray.h | 108 + arm_compute/runtime/CL/CLDistribution1D.h | 79 + arm_compute/runtime/CL/CLFunctions.h | 94 + arm_compute/runtime/CL/CLHOG.h | 80 + arm_compute/runtime/CL/CLLut.h | 89 + arm_compute/runtime/CL/CLLutAllocator.h | 88 + arm_compute/runtime/CL/CLMultiHOG.h | 56 + arm_compute/runtime/CL/CLMultiImage.h | 87 + arm_compute/runtime/CL/CLPyramid.h | 82 + arm_compute/runtime/CL/CLScheduler.h | 158 + arm_compute/runtime/CL/CLSubTensor.h | 99 + arm_compute/runtime/CL/CLTensor.h | 81 + arm_compute/runtime/CL/CLTensorAllocator.h | 103 + arm_compute/runtime/CL/ICLSimpleFunction.h | 50 + .../runtime/CL/functions/CLAbsoluteDifference.h | 50 + arm_compute/runtime/CL/functions/CLAccumulate.h | 73 + .../runtime/CL/functions/CLActivationLayer.h | 51 + .../runtime/CL/functions/CLArithmeticAddition.h | 52 + .../runtime/CL/functions/CLArithmeticSubtraction.h | 53 + .../CL/functions/CLBatchNormalizationLayer.h | 67 + arm_compute/runtime/CL/functions/CLBitwiseAnd.h | 50 + arm_compute/runtime/CL/functions/CLBitwiseNot.h | 49 + arm_compute/runtime/CL/functions/CLBitwiseOr.h | 50 + arm_compute/runtime/CL/functions/CLBitwiseXor.h | 50 + arm_compute/runtime/CL/functions/CLBox3x3.h | 55 + arm_compute/runtime/CL/functions/CLCannyEdge.h | 85 + .../runtime/CL/functions/CLChannelCombine.h | 58 + .../runtime/CL/functions/CLChannelExtract.h | 56 + arm_compute/runtime/CL/functions/CLColorConvert.h | 68 + arm_compute/runtime/CL/functions/CLConvolution.h | 128 + .../runtime/CL/functions/CLConvolutionLayer.h | 121 + .../runtime/CL/functions/CLDepthConcatenate.h | 69 + arm_compute/runtime/CL/functions/CLDepthConvert.h | 60 + arm_compute/runtime/CL/functions/CLDerivative.h | 59 + arm_compute/runtime/CL/functions/CLDilate.h | 55 + .../runtime/CL/functions/CLEqualizeHistogram.h | 72 + arm_compute/runtime/CL/functions/CLErode.h | 55 + arm_compute/runtime/CL/functions/CLFastCorners.h | 88 + arm_compute/runtime/CL/functions/CLFillBorder.h | 49 + .../runtime/CL/functions/CLFullyConnectedLayer.h | 120 + arm_compute/runtime/CL/functions/CLGEMM.h | 84 + .../runtime/CL/functions/CLGEMMInterleave4x4.h | 50 + arm_compute/runtime/CL/functions/CLGEMMLowp.h | 85 + arm_compute/runtime/CL/functions/CLGaussian3x3.h | 55 + arm_compute/runtime/CL/functions/CLGaussian5x5.h | 70 + .../runtime/CL/functions/CLGaussianPyramid.h | 119 + arm_compute/runtime/CL/functions/CLHOGDescriptor.h | 72 + arm_compute/runtime/CL/functions/CLHOGDetector.h | 78 + arm_compute/runtime/CL/functions/CLHOGGradient.h | 72 + .../runtime/CL/functions/CLHOGMultiDetection.h | 105 + arm_compute/runtime/CL/functions/CLHarrisCorners.h | 104 + arm_compute/runtime/CL/functions/CLHistogram.h | 68 + arm_compute/runtime/CL/functions/CLIntegralImage.h | 60 + .../runtime/CL/functions/CLLaplacianPyramid.h | 85 + .../runtime/CL/functions/CLLaplacianReconstruct.h | 91 + .../runtime/CL/functions/CLLocallyConnectedLayer.h | 79 + arm_compute/runtime/CL/functions/CLMagnitude.h | 48 + arm_compute/runtime/CL/functions/CLMeanStdDev.h | 56 + arm_compute/runtime/CL/functions/CLMedian3x3.h | 55 + .../runtime/CL/functions/CLMinMaxLocation.h | 86 + .../runtime/CL/functions/CLNonLinearFilter.h | 61 + .../CL/functions/CLNonMaximaSuppression3x3.h | 55 + .../runtime/CL/functions/CLNormalizationLayer.h | 71 + arm_compute/runtime/CL/functions/CLOpticalFlow.h | 111 + arm_compute/runtime/CL/functions/CLPhase.h | 48 + .../CL/functions/CLPixelWiseMultiplication.h | 51 + arm_compute/runtime/CL/functions/CLPoolingLayer.h | 52 + arm_compute/runtime/CL/functions/CLRemap.h | 59 + arm_compute/runtime/CL/functions/CLScale.h | 52 + arm_compute/runtime/CL/functions/CLScharr3x3.h | 58 + arm_compute/runtime/CL/functions/CLSobel3x3.h | 58 + arm_compute/runtime/CL/functions/CLSobel5x5.h | 74 + arm_compute/runtime/CL/functions/CLSobel7x7.h | 74 + arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 69 + arm_compute/runtime/CL/functions/CLTableLookup.h | 47 + arm_compute/runtime/CL/functions/CLThreshold.h | 55 + arm_compute/runtime/CL/functions/CLTranspose.h | 50 + arm_compute/runtime/CL/functions/CLWarpAffine.h | 52 + .../runtime/CL/functions/CLWarpPerspective.h | 52 + arm_compute/runtime/CPP/CPPScheduler.h | 73 + arm_compute/runtime/Distribution1D.h | 55 + arm_compute/runtime/HOG.h | 56 + arm_compute/runtime/IFunction.h | 54 + arm_compute/runtime/ILutAllocator.h | 84 + arm_compute/runtime/IScheduler.h | 55 + arm_compute/runtime/ITensorAllocator.h | 93 + arm_compute/runtime/Lut.h | 68 + arm_compute/runtime/LutAllocator.h | 58 + arm_compute/runtime/MultiHOG.h | 58 + arm_compute/runtime/MultiImage.h | 96 + arm_compute/runtime/NEON/INESimpleFunction.h | 50 + arm_compute/runtime/NEON/NEFunctions.h | 96 + arm_compute/runtime/NEON/NEScheduler.h | 33 + .../runtime/NEON/functions/NEAbsoluteDifference.h | 50 + arm_compute/runtime/NEON/functions/NEAccumulate.h | 74 + .../runtime/NEON/functions/NEActivationLayer.h | 51 + .../runtime/NEON/functions/NEArithmeticAddition.h | 48 + .../NEON/functions/NEArithmeticSubtraction.h | 48 + .../NEON/functions/NEBatchNormalizationLayer.h | 66 + arm_compute/runtime/NEON/functions/NEBitwiseAnd.h | 46 + arm_compute/runtime/NEON/functions/NEBitwiseNot.h | 45 + arm_compute/runtime/NEON/functions/NEBitwiseOr.h | 46 + arm_compute/runtime/NEON/functions/NEBitwiseXor.h | 46 + arm_compute/runtime/NEON/functions/NEBox3x3.h | 58 + arm_compute/runtime/NEON/functions/NECannyEdge.h | 97 + .../runtime/NEON/functions/NEChannelCombine.h | 58 + .../runtime/NEON/functions/NEChannelExtract.h | 56 + .../runtime/NEON/functions/NEColorConvert.h | 65 + arm_compute/runtime/NEON/functions/NEConvolution.h | 128 + .../runtime/NEON/functions/NEConvolutionLayer.h | 115 + .../runtime/NEON/functions/NEDepthConcatenate.h | 66 + .../runtime/NEON/functions/NEDepthConvert.h | 67 + arm_compute/runtime/NEON/functions/NEDerivative.h | 70 + arm_compute/runtime/NEON/functions/NEDilate.h | 55 + .../NEON/functions/NEDirectConvolutionLayer.h | 72 + .../runtime/NEON/functions/NEEqualizeHistogram.h | 77 + arm_compute/runtime/NEON/functions/NEErode.h | 55 + arm_compute/runtime/NEON/functions/NEFastCorners.h | 80 + arm_compute/runtime/NEON/functions/NEFillBorder.h | 58 + .../runtime/NEON/functions/NEFullyConnectedLayer.h | 119 + arm_compute/runtime/NEON/functions/NEGEMM.h | 78 + .../runtime/NEON/functions/NEGEMMInterleave4x4.h | 49 + arm_compute/runtime/NEON/functions/NEGEMMLowp.h | 85 + .../runtime/NEON/functions/NEGEMMTranspose1xW.h | 47 + arm_compute/runtime/NEON/functions/NEGaussian3x3.h | 55 + arm_compute/runtime/NEON/functions/NEGaussian5x5.h | 71 + .../runtime/NEON/functions/NEGaussianPyramid.h | 122 + .../runtime/NEON/functions/NEHOGDescriptor.h | 71 + arm_compute/runtime/NEON/functions/NEHOGDetector.h | 57 + arm_compute/runtime/NEON/functions/NEHOGGradient.h | 72 + .../runtime/NEON/functions/NEHOGMultiDetection.h | 105 + .../runtime/NEON/functions/NEHarrisCorners.h | 103 + arm_compute/runtime/NEON/functions/NEHistogram.h | 63 + .../runtime/NEON/functions/NEIntegralImage.h | 45 + .../runtime/NEON/functions/NELaplacianPyramid.h | 85 + .../NEON/functions/NELaplacianReconstruct.h | 91 + .../NEON/functions/NELocallyConnectedLayer.h | 79 + arm_compute/runtime/NEON/functions/NEMagnitude.h | 47 + arm_compute/runtime/NEON/functions/NEMeanStdDev.h | 62 + arm_compute/runtime/NEON/functions/NEMedian3x3.h | 56 + .../runtime/NEON/functions/NEMinMaxLocation.h | 71 + .../runtime/NEON/functions/NENonLinearFilter.h | 61 + .../NEON/functions/NENonMaximaSuppression3x3.h | 56 + .../runtime/NEON/functions/NENormalizationLayer.h | 71 + arm_compute/runtime/NEON/functions/NEOpticalFlow.h | 95 + arm_compute/runtime/NEON/functions/NEPhase.h | 46 + .../NEON/functions/NEPixelWiseMultiplication.h | 50 + .../runtime/NEON/functions/NEPoolingLayer.h | 52 + arm_compute/runtime/NEON/functions/NERemap.h | 60 + arm_compute/runtime/NEON/functions/NEScale.h | 62 + arm_compute/runtime/NEON/functions/NEScharr3x3.h | 59 + arm_compute/runtime/NEON/functions/NESobel3x3.h | 59 + arm_compute/runtime/NEON/functions/NESobel5x5.h | 75 + arm_compute/runtime/NEON/functions/NESobel7x7.h | 75 + .../runtime/NEON/functions/NESoftmaxLayer.h | 71 + arm_compute/runtime/NEON/functions/NETableLookup.h | 47 + arm_compute/runtime/NEON/functions/NEThreshold.h | 54 + arm_compute/runtime/NEON/functions/NETranspose.h | 51 + arm_compute/runtime/NEON/functions/NEWarpAffine.h | 52 + .../runtime/NEON/functions/NEWarpPerspective.h | 52 + arm_compute/runtime/OMP/OMPScheduler.h | 68 + arm_compute/runtime/Pyramid.h | 76 + arm_compute/runtime/Scheduler.h | 77 + arm_compute/runtime/SingleThreadScheduler.h | 62 + arm_compute/runtime/SubTensor.h | 73 + arm_compute/runtime/Tensor.h | 65 + arm_compute/runtime/TensorAllocator.h | 90 + arm_compute/runtime/Utils.h | 41 + data | 1 + docs/00_introduction.dox | 514 + docs/01_library.dox | 250 + docs/02_tests.dox | 93 + docs/Doxyfile | 2458 ++++ docs/header.html | 56 + examples/SConscript | 70 + examples/cl_convolution.cpp | 118 + examples/cl_events.cpp | 114 + examples/neon_cnn.cpp | 230 + examples/neon_convolution.cpp | 117 + examples/neon_copy_objects.cpp | 152 + examples/neon_scale.cpp | 90 + examples/neoncl_scale_median_gaussian.cpp | 126 + include/CL/cl.h | 1214 ++ include/CL/cl.hpp | 12452 +++++++++++++++++++ include/CL/cl2.hpp | 9526 ++++++++++++++ include/CL/cl_d3d10.h | 126 + include/CL/cl_d3d11.h | 126 + include/CL/cl_dx9_media_sharing.h | 127 + include/CL/cl_egl.h | 131 + include/CL/cl_ext.h | 316 + include/CL/cl_gl.h | 162 + include/CL/cl_gl_ext.h | 69 + include/CL/cl_platform.h | 1254 ++ include/CL/opencl.h | 54 + opencl-1.2-stubs/SConscript | 7 + opencl-1.2-stubs/opencl_stubs.c | 1002 ++ scripts/add_copyright.py | 83 + scripts/check_bad_style.sh | 71 + scripts/check_clang-tidy.py | 59 + scripts/clang-tidy.h | 112 + scripts/clang-tidy.sh | 91 + scripts/copyright_eula.txt | 19 + scripts/copyright_mit.txt | 21 + scripts/fix_code_formatting.sh | 33 + scripts/format_doxygen.py | 151 + scripts/include_functions_kernels.py | 64 + src/core/AccessWindowAutoPadding.cpp | 85 + src/core/AccessWindowStatic.cpp | 202 + src/core/AccessWindowTranspose.cpp | 209 + src/core/CL/CLHelpers.cpp | 165 + src/core/CL/CLKernelLibrary.cpp | 597 + src/core/CL/ICLDistribution1D.cpp | 51 + src/core/CL/ICLHOG.cpp | 47 + src/core/CL/ICLKernel.cpp | 154 + src/core/CL/ICLLut.cpp | 47 + src/core/CL/ICLMultiHOG.cpp | 38 + src/core/CL/ICLMultiImage.cpp | 39 + src/core/CL/ICLSimple2DKernel.cpp | 48 + src/core/CL/ICLSimple3DKernel.cpp | 47 + src/core/CL/ICLSimpleKernel.cpp | 54 + src/core/CL/ICLTensor.cpp | 56 + src/core/CL/OpenCL.cpp | 586 + src/core/CL/cl_kernels/absdiff.cl | 65 + src/core/CL/cl_kernels/accumulate.cl | 130 + src/core/CL/cl_kernels/activation_layer.cl | 89 + src/core/CL/cl_kernels/arithmetic_op.cl | 122 + src/core/CL/cl_kernels/batchnormalization_layer.cl | 99 + src/core/CL/cl_kernels/bitwise_op.cl | 159 + src/core/CL/cl_kernels/canny.cl | 429 + src/core/CL/cl_kernels/channel_combine.cl | 416 + src/core/CL/cl_kernels/channel_extract.cl | 272 + src/core/CL/cl_kernels/color_convert.cl | 1823 +++ src/core/CL/cl_kernels/concatenate.cl | 53 + src/core/CL/cl_kernels/convolution3x3.cl | 138 + src/core/CL/cl_kernels/convolution5x5.cl | 289 + src/core/CL/cl_kernels/convolution7x7.cl | 340 + src/core/CL/cl_kernels/convolution9x9.cl | 406 + src/core/CL/cl_kernels/convolution_layer.cl | 241 + src/core/CL/cl_kernels/convolution_rectangle.cl | 118 + src/core/CL/cl_kernels/depth_convert.cl | 98 + src/core/CL/cl_kernels/derivative.cl | 80 + src/core/CL/cl_kernels/dilate.cl | 56 + src/core/CL/cl_kernels/erode.cl | 56 + src/core/CL/cl_kernels/fast_corners.cl | 260 + src/core/CL/cl_kernels/fill_border.cl | 161 + src/core/CL/cl_kernels/gaussian_pyramid.cl | 113 + src/core/CL/cl_kernels/gemm.cl | 1099 ++ src/core/CL/cl_kernels/harris_corners.cl | 376 + src/core/CL/cl_kernels/helpers.h | 218 + src/core/CL/cl_kernels/histogram.cl | 243 + src/core/CL/cl_kernels/hog.cl | 455 + src/core/CL/cl_kernels/integral_image.cl | 100 + src/core/CL/cl_kernels/magnitude_phase.cl | 162 + src/core/CL/cl_kernels/mean_stddev.cl | 84 + src/core/CL/cl_kernels/minmaxloc.cl | 164 + src/core/CL/cl_kernels/non_linear_filter3x3.cl | 186 + src/core/CL/cl_kernels/non_linear_filter5x5.cl | 479 + src/core/CL/cl_kernels/non_linear_filter_helpers.h | 145 + src/core/CL/cl_kernels/nonmax.cl | 70 + src/core/CL/cl_kernels/normalization_layer.cl | 154 + src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl | 522 + src/core/CL/cl_kernels/pixelwise_mul_float.cl | 89 + src/core/CL/cl_kernels/pixelwise_mul_int.cl | 79 + src/core/CL/cl_kernels/pooling_layer.cl | 159 + src/core/CL/cl_kernels/remap.cl | 132 + src/core/CL/cl_kernels/scale.cl | 123 + src/core/CL/cl_kernels/scharr_filter.cl | 124 + src/core/CL/cl_kernels/sobel_filter.cl | 541 + src/core/CL/cl_kernels/softmax_layer.cl | 221 + src/core/CL/cl_kernels/tablelookup.cl | 114 + src/core/CL/cl_kernels/threshold.cl | 104 + src/core/CL/cl_kernels/transpose.cl | 217 + src/core/CL/cl_kernels/types.h | 56 + src/core/CL/cl_kernels/warp_affine.cl | 120 + src/core/CL/cl_kernels/warp_helpers.h | 111 + src/core/CL/cl_kernels/warp_perspective.cl | 128 + src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 102 + src/core/CL/kernels/CLAccumulateKernel.cpp | 83 + src/core/CL/kernels/CLActivationLayerKernel.cpp | 64 + src/core/CL/kernels/CLArithmeticAdditionKernel.cpp | 111 + .../CL/kernels/CLArithmeticSubtractionKernel.cpp | 111 + .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 115 + src/core/CL/kernels/CLBitwiseAndKernel.cpp | 88 + src/core/CL/kernels/CLBitwiseNotKernel.cpp | 48 + src/core/CL/kernels/CLBitwiseOrKernel.cpp | 89 + src/core/CL/kernels/CLBitwiseXorKernel.cpp | 89 + src/core/CL/kernels/CLBox3x3Kernel.cpp | 77 + src/core/CL/kernels/CLCannyEdgeKernel.cpp | 255 + src/core/CL/kernels/CLChannelCombineKernel.cpp | 244 + src/core/CL/kernels/CLChannelExtractKernel.cpp | 148 + src/core/CL/kernels/CLCol2ImKernel.cpp | 85 + src/core/CL/kernels/CLColorConvertKernel.cpp | 476 + src/core/CL/kernels/CLConvolutionKernel.cpp | 330 + src/core/CL/kernels/CLDepthConcatenateKernel.cpp | 113 + src/core/CL/kernels/CLDepthConvertKernel.cpp | 99 + src/core/CL/kernels/CLDerivativeKernel.cpp | 145 + src/core/CL/kernels/CLDilateKernel.cpp | 65 + src/core/CL/kernels/CLErodeKernel.cpp | 65 + src/core/CL/kernels/CLFastCornersKernel.cpp | 172 + src/core/CL/kernels/CLFillBorderKernel.cpp | 175 + src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp | 106 + .../CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp | 122 + .../kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp | 92 + src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp | 92 + src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 168 + src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 129 + src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 76 + src/core/CL/kernels/CLGaussian5x5Kernel.cpp | 45 + src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 218 + src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 200 + src/core/CL/kernels/CLHOGDetectorKernel.cpp | 130 + src/core/CL/kernels/CLHarrisCornersKernel.cpp | 126 + src/core/CL/kernels/CLHistogramKernel.cpp | 224 + src/core/CL/kernels/CLIm2ColKernel.cpp | 202 + src/core/CL/kernels/CLIntegralImageKernel.cpp | 112 + src/core/CL/kernels/CLLKTrackerKernel.cpp | 285 + .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 116 + src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 168 + src/core/CL/kernels/CLMeanStdDevKernel.cpp | 134 + src/core/CL/kernels/CLMedian3x3Kernel.cpp | 66 + src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 169 + src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 98 + .../CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 72 + src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 111 + .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 154 + src/core/CL/kernels/CLPoolingLayerKernel.cpp | 180 + src/core/CL/kernels/CLRemapKernel.cpp | 108 + src/core/CL/kernels/CLScaleKernel.cpp | 99 + src/core/CL/kernels/CLScharr3x3Kernel.cpp | 132 + src/core/CL/kernels/CLSobel3x3Kernel.cpp | 133 + src/core/CL/kernels/CLSobel5x5Kernel.cpp | 234 + src/core/CL/kernels/CLSobel7x7Kernel.cpp | 238 + src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 216 + src/core/CL/kernels/CLTableLookupKernel.cpp | 63 + src/core/CL/kernels/CLThresholdKernel.cpp | 76 + src/core/CL/kernels/CLTransposeKernel.cpp | 82 + src/core/CL/kernels/CLWarpAffineKernel.cpp | 99 + src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 99 + src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 163 + src/core/CPP/ICPPSimpleKernel.cpp | 53 + src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp | 110 + ...PPDetectionWindowNonMaximaSuppressionKernel.cpp | 120 + .../CPP/kernels/CPPSortEuclideanDistanceKernel.cpp | 115 + src/core/Error.cpp | 52 + src/core/HOGInfo.cpp | 122 + src/core/Helpers.cpp | 164 + src/core/IAccessWindow.cpp | 221 + src/core/IDistribution.cpp | 36 + src/core/IDistribution1D.cpp | 69 + src/core/IKernel.cpp | 54 + src/core/ITensor.cpp | 150 + src/core/MultiImageInfo.cpp | 53 + .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 211 + src/core/NEON/kernels/NEAccumulateKernel.cpp | 357 + src/core/NEON/kernels/NEActivationLayerKernel.cpp | 302 + .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 378 + .../NEON/kernels/NEArithmeticSubtractionKernel.cpp | 371 + .../kernels/NEBatchNormalizationLayerKernel.cpp | 187 + src/core/NEON/kernels/NEBitwiseAndKernel.cpp | 109 + src/core/NEON/kernels/NEBitwiseNotKernel.cpp | 96 + src/core/NEON/kernels/NEBitwiseOrKernel.cpp | 109 + src/core/NEON/kernels/NEBitwiseXorKernel.cpp | 105 + src/core/NEON/kernels/NEBox3x3Kernel.cpp | 220 + src/core/NEON/kernels/NECannyEdgeKernel.cpp | 1856 +++ src/core/NEON/kernels/NEChannelCombineKernel.cpp | 467 + src/core/NEON/kernels/NEChannelExtractKernel.cpp | 354 + src/core/NEON/kernels/NECol2ImKernel.cpp | 124 + src/core/NEON/kernels/NEColorConvertKernel.cpp | 582 + src/core/NEON/kernels/NEConvolutionKernel.cpp | 1618 +++ .../kernels/NECumulativeDistributionKernel.cpp | 110 + src/core/NEON/kernels/NEDepthConcatenateKernel.cpp | 105 + src/core/NEON/kernels/NEDepthConvertKernel.cpp | 384 + src/core/NEON/kernels/NEDerivativeKernel.cpp | 224 + src/core/NEON/kernels/NEDilateKernel.cpp | 126 + ...EDirectConvolutionLayerBiasAccumulateKernel.cpp | 207 + .../kernels/NEDirectConvolutionLayerKernel.cpp | 817 ++ src/core/NEON/kernels/NEErodeKernel.cpp | 126 + src/core/NEON/kernels/NEFastCornersKernel.cpp | 474 + src/core/NEON/kernels/NEFillArrayKernel.cpp | 91 + src/core/NEON/kernels/NEFillBorderKernel.cpp | 259 + src/core/NEON/kernels/NEFillInnerBorderKernel.cpp | 137 + .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 191 + .../kernels/NEGEMMLowpMatrixMultiplyKernel.cpp | 423 + .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 128 + .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 202 + .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 1168 ++ src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 150 + src/core/NEON/kernels/NEGaussian3x3Kernel.cpp | 132 + src/core/NEON/kernels/NEGaussian5x5Kernel.cpp | 203 + src/core/NEON/kernels/NEGaussianPyramidKernel.cpp | 279 + src/core/NEON/kernels/NEHOGDescriptorKernel.cpp | 802 ++ src/core/NEON/kernels/NEHOGDetectorKernel.cpp | 186 + src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 1137 ++ src/core/NEON/kernels/NEHistogramKernel.cpp | 252 + src/core/NEON/kernels/NEIm2ColKernel.cpp | 338 + src/core/NEON/kernels/NEIntegralImageKernel.cpp | 141 + src/core/NEON/kernels/NELKTrackerKernel.cpp | 533 + .../NELocallyConnectedMatrixMultiplyKernel.cpp | 226 + src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 869 ++ src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 152 + src/core/NEON/kernels/NEMedian3x3Kernel.cpp | 135 + src/core/NEON/kernels/NEMinMaxLocationKernel.cpp | 361 + src/core/NEON/kernels/NENonLinearFilterKernel.cpp | 1009 ++ .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 513 + .../NEON/kernels/NENormalizationLayerKernel.cpp | 218 + .../kernels/NEPixelWiseMultiplicationKernel.cpp | 524 + src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 415 + src/core/NEON/kernels/NERemapKernel.cpp | 226 + src/core/NEON/kernels/NEScaleKernel.cpp | 359 + src/core/NEON/kernels/NEScharr3x3Kernel.cpp | 259 + src/core/NEON/kernels/NESobel3x3Kernel.cpp | 269 + src/core/NEON/kernels/NESobel5x5Kernel.cpp | 402 + src/core/NEON/kernels/NESobel7x7Kernel.cpp | 520 + src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 474 + src/core/NEON/kernels/NETableLookupKernel.cpp | 142 + src/core/NEON/kernels/NEThresholdKernel.cpp | 129 + src/core/NEON/kernels/NETransposeKernel.cpp | 241 + src/core/NEON/kernels/NEWarpKernel.cpp | 651 + src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 175 + src/core/PyramidInfo.cpp | 105 + src/core/SubTensorInfo.cpp | 78 + src/core/TensorInfo.cpp | 377 + src/core/Utils.cpp | 329 + src/core/Validate.cpp | 215 + src/runtime/CL/CLDistribution1D.cpp | 61 + src/runtime/CL/CLHOG.cpp | 84 + src/runtime/CL/CLLut.cpp | 99 + src/runtime/CL/CLLutAllocator.cpp | 77 + src/runtime/CL/CLMultiHOG.cpp | 52 + src/runtime/CL/CLMultiImage.cpp | 168 + src/runtime/CL/CLPyramid.cpp | 130 + src/runtime/CL/CLScheduler.cpp | 49 + src/runtime/CL/CLSubTensor.cpp | 81 + src/runtime/CL/CLTensor.cpp | 73 + src/runtime/CL/CLTensorAllocator.cpp | 87 + src/runtime/CL/ICLSimpleFunction.cpp | 42 + src/runtime/CL/functions/CLAbsoluteDifference.cpp | 38 + src/runtime/CL/functions/CLAccumulate.cpp | 52 + src/runtime/CL/functions/CLActivationLayer.cpp | 36 + src/runtime/CL/functions/CLArithmeticAddition.cpp | 38 + .../CL/functions/CLArithmeticSubtraction.cpp | 38 + .../CL/functions/CLBatchNormalizationLayer.cpp | 48 + src/runtime/CL/functions/CLBitwiseAnd.cpp | 38 + src/runtime/CL/functions/CLBitwiseNot.cpp | 38 + src/runtime/CL/functions/CLBitwiseOr.cpp | 38 + src/runtime/CL/functions/CLBitwiseXor.cpp | 38 + src/runtime/CL/functions/CLBox3x3.cpp | 40 + src/runtime/CL/functions/CLCannyEdge.cpp | 155 + src/runtime/CL/functions/CLChannelCombine.cpp | 45 + src/runtime/CL/functions/CLChannelExtract.cpp | 45 + src/runtime/CL/functions/CLColorConvert.cpp | 59 + src/runtime/CL/functions/CLConvolution.cpp | 114 + src/runtime/CL/functions/CLConvolutionLayer.cpp | 247 + src/runtime/CL/functions/CLDepthConcatenate.cpp | 71 + src/runtime/CL/functions/CLDepthConvert.cpp | 38 + src/runtime/CL/functions/CLDerivative.cpp | 40 + src/runtime/CL/functions/CLDilate.cpp | 40 + src/runtime/CL/functions/CLEqualizeHistogram.cpp | 110 + src/runtime/CL/functions/CLErode.cpp | 40 + src/runtime/CL/functions/CLFastCorners.cpp | 127 + src/runtime/CL/functions/CLFillBorder.cpp | 38 + src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 343 + src/runtime/CL/functions/CLGEMM.cpp | 145 + src/runtime/CL/functions/CLGEMMInterleave4x4.cpp | 36 + src/runtime/CL/functions/CLGEMMLowp.cpp | 85 + src/runtime/CL/functions/CLGaussian3x3.cpp | 40 + src/runtime/CL/functions/CLGaussian5x5.cpp | 62 + src/runtime/CL/functions/CLGaussianPyramid.cpp | 183 + src/runtime/CL/functions/CLHOGDescriptor.cpp | 99 + src/runtime/CL/functions/CLHOGDetector.cpp | 69 + src/runtime/CL/functions/CLHOGGradient.cpp | 75 + src/runtime/CL/functions/CLHOGMultiDetection.cpp | 240 + src/runtime/CL/functions/CLHarrisCorners.cpp | 157 + src/runtime/CL/functions/CLHistogram.cpp | 45 + src/runtime/CL/functions/CLIntegralImage.cpp | 46 + src/runtime/CL/functions/CLLaplacianPyramid.cpp | 99 + .../CL/functions/CLLaplacianReconstruct.cpp | 99 + .../CL/functions/CLLocallyConnectedLayer.cpp | 131 + src/runtime/CL/functions/CLMagnitude.cpp | 38 + src/runtime/CL/functions/CLMeanStdDev.cpp | 53 + src/runtime/CL/functions/CLMedian3x3.cpp | 40 + src/runtime/CL/functions/CLMinMaxLocation.cpp | 98 + src/runtime/CL/functions/CLNonLinearFilter.cpp | 40 + .../CL/functions/CLNonMaximaSuppression3x3.cpp | 47 + src/runtime/CL/functions/CLNormalizationLayer.cpp | 60 + src/runtime/CL/functions/CLOpticalFlow.cpp | 150 + src/runtime/CL/functions/CLPhase.cpp | 38 + .../CL/functions/CLPixelWiseMultiplication.cpp | 39 + src/runtime/CL/functions/CLPoolingLayer.cpp | 41 + src/runtime/CL/functions/CLRemap.cpp | 50 + src/runtime/CL/functions/CLScale.cpp | 45 + src/runtime/CL/functions/CLScharr3x3.cpp | 40 + src/runtime/CL/functions/CLSobel3x3.cpp | 40 + src/runtime/CL/functions/CLSobel5x5.cpp | 81 + src/runtime/CL/functions/CLSobel7x7.cpp | 81 + src/runtime/CL/functions/CLSoftmaxLayer.cpp | 67 + src/runtime/CL/functions/CLTableLookup.cpp | 38 + src/runtime/CL/functions/CLThreshold.cpp | 38 + src/runtime/CL/functions/CLTranspose.cpp | 38 + src/runtime/CL/functions/CLWarpAffine.cpp | 40 + src/runtime/CL/functions/CLWarpPerspective.cpp | 40 + src/runtime/CPP/CPPScheduler.cpp | 225 + src/runtime/CPP/SingleThreadScheduler.cpp | 52 + src/runtime/Distribution1D.cpp | 42 + src/runtime/HOG.cpp | 51 + src/runtime/ILutAllocator.cpp | 58 + src/runtime/ITensorAllocator.cpp | 51 + src/runtime/Lut.cpp | 75 + src/runtime/LutAllocator.cpp | 52 + src/runtime/MultiHOG.cpp | 52 + src/runtime/MultiImage.cpp | 220 + src/runtime/NEON/INESimpleFunction.cpp | 39 + .../NEON/functions/NEAbsoluteDifference.cpp | 38 + src/runtime/NEON/functions/NEAccumulate.cpp | 61 + src/runtime/NEON/functions/NEActivationLayer.cpp | 36 + .../NEON/functions/NEArithmeticAddition.cpp | 38 + .../NEON/functions/NEArithmeticSubtraction.cpp | 38 + .../NEON/functions/NEBatchNormalizationLayer.cpp | 49 + src/runtime/NEON/functions/NEBitwiseAnd.cpp | 38 + src/runtime/NEON/functions/NEBitwiseNot.cpp | 38 + src/runtime/NEON/functions/NEBitwiseOr.cpp | 38 + src/runtime/NEON/functions/NEBitwiseXor.cpp | 38 + src/runtime/NEON/functions/NEBox3x3.cpp | 49 + src/runtime/NEON/functions/NECannyEdge.cpp | 169 + src/runtime/NEON/functions/NEChannelCombine.cpp | 45 + src/runtime/NEON/functions/NEChannelExtract.cpp | 45 + src/runtime/NEON/functions/NEColorConvert.cpp | 59 + src/runtime/NEON/functions/NEConvolution.cpp | 120 + src/runtime/NEON/functions/NEConvolutionLayer.cpp | 246 + src/runtime/NEON/functions/NEDepthConcatenate.cpp | 67 + src/runtime/NEON/functions/NEDepthConvert.cpp | 44 + src/runtime/NEON/functions/NEDerivative.cpp | 52 + src/runtime/NEON/functions/NEDilate.cpp | 40 + .../NEON/functions/NEDirectConvolutionLayer.cpp | 75 + src/runtime/NEON/functions/NEEqualizeHistogram.cpp | 62 + src/runtime/NEON/functions/NEErode.cpp | 40 + src/runtime/NEON/functions/NEFastCorners.cpp | 101 + src/runtime/NEON/functions/NEFillBorder.cpp | 39 + .../NEON/functions/NEFullyConnectedLayer.cpp | 344 + src/runtime/NEON/functions/NEGEMM.cpp | 156 + src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp | 36 + src/runtime/NEON/functions/NEGEMMLowp.cpp | 84 + src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp | 40 + src/runtime/NEON/functions/NEGaussian3x3.cpp | 40 + src/runtime/NEON/functions/NEGaussian5x5.cpp | 60 + src/runtime/NEON/functions/NEGaussianPyramid.cpp | 183 + src/runtime/NEON/functions/NEHOGDescriptor.cpp | 99 + src/runtime/NEON/functions/NEHOGDetector.cpp | 36 + src/runtime/NEON/functions/NEHOGGradient.cpp | 80 + src/runtime/NEON/functions/NEHOGMultiDetection.cpp | 231 + src/runtime/NEON/functions/NEHarrisCorners.cpp | 212 + src/runtime/NEON/functions/NEHistogram.cpp | 58 + src/runtime/NEON/functions/NEIntegralImage.cpp | 40 + src/runtime/NEON/functions/NELaplacianPyramid.cpp | 102 + .../NEON/functions/NELaplacianReconstruct.cpp | 100 + .../NEON/functions/NELocallyConnectedLayer.cpp | 131 + src/runtime/NEON/functions/NEMagnitude.cpp | 48 + src/runtime/NEON/functions/NEMeanStdDev.cpp | 47 + src/runtime/NEON/functions/NEMedian3x3.cpp | 40 + src/runtime/NEON/functions/NEMinMaxLocation.cpp | 50 + src/runtime/NEON/functions/NENonLinearFilter.cpp | 42 + .../NEON/functions/NENonMaximaSuppression3x3.cpp | 47 + .../NEON/functions/NENormalizationLayer.cpp | 61 + src/runtime/NEON/functions/NEOpticalFlow.cpp | 119 + src/runtime/NEON/functions/NEPhase.cpp | 38 + .../NEON/functions/NEPixelWiseMultiplication.cpp | 38 + src/runtime/NEON/functions/NEPoolingLayer.cpp | 41 + src/runtime/NEON/functions/NERemap.cpp | 53 + src/runtime/NEON/functions/NEScale.cpp | 171 + src/runtime/NEON/functions/NEScharr3x3.cpp | 40 + src/runtime/NEON/functions/NESobel3x3.cpp | 40 + src/runtime/NEON/functions/NESobel5x5.cpp | 81 + src/runtime/NEON/functions/NESobel7x7.cpp | 81 + src/runtime/NEON/functions/NESoftmaxLayer.cpp | 72 + src/runtime/NEON/functions/NETableLookup.cpp | 38 + src/runtime/NEON/functions/NEThreshold.cpp | 38 + src/runtime/NEON/functions/NETranspose.cpp | 38 + src/runtime/NEON/functions/NEWarpAffine.cpp | 62 + src/runtime/NEON/functions/NEWarpPerspective.cpp | 62 + src/runtime/OMP/OMPScheduler.cpp | 83 + src/runtime/Pyramid.cpp | 120 + src/runtime/Scheduler.cpp | 149 + src/runtime/SubTensor.cpp | 57 + src/runtime/Tensor.cpp | 51 + src/runtime/TensorAllocator.cpp | 119 + src/runtime/Utils.cpp | 42 + tests/CL/CLAccessor.h | 136 + tests/CL/Helper.h | 76 + tests/CMakeLists.txt | 85 + tests/Globals.h | 38 + tests/IAccessor.h | 89 + tests/NEON/Helper.h | 77 + tests/NEON/NEAccessor.h | 124 + tests/ProgramOptions.cpp | 88 + tests/ProgramOptions.h | 101 + tests/RawTensor.cpp | 180 + tests/RawTensor.h | 159 + tests/SConscript | 150 + tests/TensorCache.h | 118 + tests/TensorLibrary.cpp | 475 + tests/TensorLibrary.h | 656 + tests/TypePrinter.h | 403 + tests/TypeReader.h | 67 + tests/Types.h | 37 + tests/UserConfiguration.cpp | 55 + tests/UserConfiguration.h | 136 + tests/Utils.h | 672 + tests/benchmark/CL/ActivationLayer.cpp | 212 + tests/benchmark/CL/BitwiseAnd.cpp | 133 + tests/benchmark/CL/CMakeLists.txt | 57 + tests/benchmark/CL/ConvolutionLayer.cpp | 277 + tests/benchmark/CL/FullyConnectedLayer.cpp | 116 + tests/benchmark/CL/GEMM.cpp | 492 + tests/benchmark/CL/GEMM.h | 102 + tests/benchmark/CL/NormalizationLayer.cpp | 93 + tests/benchmark/CL/PoolingLayer.cpp | 141 + tests/benchmark/CMakeLists.txt | 100 + tests/benchmark/Datasets.h | 79 + tests/benchmark/Instrument.h | 107 + tests/benchmark/NEON/ActivationLayer.cpp | 239 + tests/benchmark/NEON/BitwiseAnd.cpp | 126 + tests/benchmark/NEON/CMakeLists.txt | 37 + tests/benchmark/NEON/ConvolutionLayer.cpp | 303 + tests/benchmark/NEON/ConvolutionLayerDirect.cpp | 74 + tests/benchmark/NEON/FullyConnectedLayer.cpp | 132 + tests/benchmark/NEON/GEMM.cpp | 709 ++ tests/benchmark/NEON/GEMM.h | 106 + tests/benchmark/NEON/NormalizationLayer.cpp | 111 + tests/benchmark/NEON/PoolingLayer.cpp | 162 + tests/benchmark/PMUCounter.cpp | 144 + tests/benchmark/PMUCounter.h | 71 + tests/benchmark/PerformanceProgramOptions.cpp | 48 + tests/benchmark/PerformanceProgramOptions.h | 45 + tests/benchmark/PerformanceUserConfiguration.cpp | 45 + tests/benchmark/PerformanceUserConfiguration.h | 57 + tests/benchmark/Profiler.cpp | 87 + tests/benchmark/Profiler.h | 76 + tests/benchmark/WallClockTimer.cpp | 56 + tests/benchmark/WallClockTimer.h | 53 + tests/benchmark/common/ActivationLayer.h | 92 + tests/benchmark/common/ConvolutionLayer.h | 107 + tests/benchmark/common/FullyConnectedLayer.h | 108 + tests/benchmark/common/NormalizationLayer.h | 96 + tests/benchmark/common/PoolingLayer.h | 95 + tests/benchmark/main.cpp | 96 + tests/benchmark/system_tests/CL/AlexNet.cpp | 87 + tests/benchmark/system_tests/CL/LeNet5.cpp | 82 + tests/benchmark/system_tests/NEON/AlexNet.cpp | 120 + tests/benchmark/system_tests/NEON/LeNet5.cpp | 80 + tests/benchmark/system_tests/common/AlexNet.h | 95 + tests/benchmark/system_tests/common/LeNet5.h | 82 + tests/boost_wrapper.h | 40 + tests/dataset/ActivationFunctionDataset.h | 66 + tests/dataset/ActivationLayerDataset.h | 177 + tests/dataset/BatchNormalizationLayerDataset.h | 90 + tests/dataset/BorderModeDataset.h | 82 + tests/dataset/ConvertPolicyDataset.h | 82 + tests/dataset/ConvolutionLayerDataset.h | 269 + tests/dataset/DataTypeDatasets.h | 193 + tests/dataset/FullyConnectedLayerDataset.h | 155 + tests/dataset/GEMMDataset.h | 204 + tests/dataset/GenericDataset.h | 97 + tests/dataset/ImageDatasets.h | 120 + tests/dataset/InterpolationPolicyDataset.h | 80 + tests/dataset/NormalizationLayerDataset.h | 99 + tests/dataset/NormalizationTypeDataset.h | 80 + tests/dataset/PoolingLayerDataset.h | 159 + tests/dataset/RoundingPolicyDataset.h | 82 + tests/dataset/ShapeDatasets.h | 130 + tests/dataset/ThresholdDataset.h | 95 + tests/model_objects/AlexNet.h | 582 + tests/model_objects/LeNet5.h | 277 + tests/validation/CL/BitwiseAnd.cpp | 218 + tests/validation/CL/CLFixture.cpp | 33 + tests/validation/CL/CLFixture.h | 48 + tests/validation/CL/CMakeLists.txt | 48 + tests/validation/CL/DepthConvert.cpp | 413 + tests/validation/CL/FillBorder.cpp | 91 + tests/validation/CL/Threshold.cpp | 154 + tests/validation/CMakeLists.txt | 96 + tests/validation/Datasets.h | 238 + tests/validation/FixedPoint.h | 975 ++ tests/validation/Helpers.h | 123 + tests/validation/NEON/AbsoluteDifference.cpp | 201 + tests/validation/NEON/Accumulate.cpp | 146 + tests/validation/NEON/AccumulateSquared.cpp | 147 + tests/validation/NEON/AccumulateWeighted.cpp | 146 + tests/validation/NEON/ActivationLayer.cpp | 217 + tests/validation/NEON/ArithmeticAddition.cpp | 228 + tests/validation/NEON/ArithmeticSubtraction.cpp | 228 + tests/validation/NEON/BatchNormalizationLayer.cpp | 195 + tests/validation/NEON/BitwiseAnd.cpp | 218 + tests/validation/NEON/BitwiseNot.cpp | 142 + tests/validation/NEON/BitwiseOr.cpp | 150 + tests/validation/NEON/BitwiseXor.cpp | 150 + tests/validation/NEON/Box3x3.cpp | 145 + tests/validation/NEON/CMakeLists.txt | 55 + tests/validation/NEON/ConvolutionLayer.cpp | 200 + tests/validation/NEON/ConvolutionLayerDirect.cpp | 219 + tests/validation/NEON/DepthConvert.cpp | 500 + tests/validation/NEON/FillBorder.cpp | 90 + tests/validation/NEON/Fixedpoint/Exp_QS8.cpp | 124 + tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp | 123 + tests/validation/NEON/Fixedpoint/Log_QS8.cpp | 123 + .../validation/NEON/Fixedpoint/Reciprocal_QS8.cpp | 123 + tests/validation/NEON/FullyConnectedLayer.cpp | 221 + tests/validation/NEON/GEMM.cpp | 203 + tests/validation/NEON/IntegralImage.cpp | 145 + tests/validation/NEON/NormalizationLayer.cpp | 152 + tests/validation/NEON/PixelWiseMultiplication.cpp | 428 + tests/validation/NEON/Pooling/PoolingLayer.cpp | 139 + tests/validation/NEON/SoftmaxLayer.cpp | 196 + tests/validation/NEON/Threshold.cpp | 154 + tests/validation/Reference.cpp | 596 + tests/validation/Reference.h | 303 + tests/validation/ReferenceCPP.cpp | 282 + tests/validation/ReferenceCPP.h | 250 + tests/validation/Tensor.h | 111 + tests/validation/TensorFactory.h | 113 + tests/validation/TensorOperations.h | 1370 ++ tests/validation/TensorVisitors.h | 386 + tests/validation/UNIT/CMakeLists.txt | 37 + tests/validation/UNIT/FixedPoint.cpp | 163 + tests/validation/UNIT/TensorInfo.cpp | 91 + tests/validation/UNIT/TensorShape.cpp | 70 + tests/validation/UNIT/Utils.cpp | 95 + tests/validation/Validation.cpp | 359 + tests/validation/Validation.h | 127 + tests/validation/ValidationProgramOptions.cpp | 50 + tests/validation/ValidationProgramOptions.h | 45 + tests/validation/ValidationUserConfiguration.h | 42 + tests/validation/main.cpp | 104 + tests/validation/system_tests/CL/AlexNet.cpp | 111 + tests/validation/system_tests/CL/LeNet5.cpp | 94 + tests/validation/system_tests/NEON/AlexNet.cpp | 112 + tests/validation/system_tests/NEON/LeNet5.cpp | 94 + utils/Utils.cpp | 171 + utils/Utils.h | 325 + 942 files changed, 157280 insertions(+) create mode 100644 .clang-format create mode 100644 .clang-tidy create mode 100644 .gitmodules create mode 160000 3rdparty create mode 100644 SConscript create mode 100644 SConstruct create mode 100644 arm_compute/core/AccessWindowAutoPadding.h create mode 100644 arm_compute/core/AccessWindowStatic.h create mode 100644 arm_compute/core/AccessWindowTranspose.h create mode 100644 arm_compute/core/CL/CLHelpers.h create mode 100644 arm_compute/core/CL/CLKernelLibrary.h create mode 100644 arm_compute/core/CL/CLKernels.h create mode 100644 arm_compute/core/CL/CLTypes.h create mode 100644 arm_compute/core/CL/ICLArray.h create mode 100644 arm_compute/core/CL/ICLDistribution1D.h create mode 100644 arm_compute/core/CL/ICLHOG.h create mode 100644 arm_compute/core/CL/ICLKernel.h create mode 100644 arm_compute/core/CL/ICLLut.h create mode 100644 arm_compute/core/CL/ICLMultiHOG.h create mode 100644 arm_compute/core/CL/ICLMultiImage.h create mode 100644 arm_compute/core/CL/ICLSimple2DKernel.h create mode 100644 arm_compute/core/CL/ICLSimple3DKernel.h create mode 100644 arm_compute/core/CL/ICLSimpleKernel.h create mode 100644 arm_compute/core/CL/ICLTensor.h create mode 100644 arm_compute/core/CL/OpenCL.h create mode 100644 arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h create mode 100644 arm_compute/core/CL/kernels/CLAccumulateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLActivationLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseAndKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseNotKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseOrKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBitwiseXorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLBox3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLCannyEdgeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLChannelCombineKernel.h create mode 100644 arm_compute/core/CL/kernels/CLChannelExtractKernel.h create mode 100644 arm_compute/core/CL/kernels/CLCol2ImKernel.h create mode 100644 arm_compute/core/CL/kernels/CLColorConvertKernel.h create mode 100644 arm_compute/core/CL/kernels/CLConvolutionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthConvertKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDerivativeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDilateKernel.h create mode 100644 arm_compute/core/CL/kernels/CLErodeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLFastCornersKernel.h create mode 100644 arm_compute/core/CL/kernels/CLFillBorderKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHOGDetectorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHarrisCornersKernel.h create mode 100644 arm_compute/core/CL/kernels/CLHistogramKernel.h create mode 100644 arm_compute/core/CL/kernels/CLIm2ColKernel.h create mode 100644 arm_compute/core/CL/kernels/CLIntegralImageKernel.h create mode 100644 arm_compute/core/CL/kernels/CLLKTrackerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h create mode 100644 arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h create mode 100644 arm_compute/core/CL/kernels/CLMeanStdDevKernel.h create mode 100644 arm_compute/core/CL/kernels/CLMedian3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h create mode 100644 arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h create mode 100644 arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h create mode 100644 arm_compute/core/CL/kernels/CLPoolingLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLRemapKernel.h create mode 100644 arm_compute/core/CL/kernels/CLScaleKernel.h create mode 100644 arm_compute/core/CL/kernels/CLScharr3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSobel3x3Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSobel5x5Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSobel7x7Kernel.h create mode 100644 arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h create mode 100644 arm_compute/core/CL/kernels/CLTableLookupKernel.h create mode 100644 arm_compute/core/CL/kernels/CLThresholdKernel.h create mode 100644 arm_compute/core/CL/kernels/CLTransposeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLWarpAffineKernel.h create mode 100644 arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h create mode 100644 arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h create mode 100644 arm_compute/core/CPP/CPPKernels.h create mode 100644 arm_compute/core/CPP/ICPPKernel.h create mode 100644 arm_compute/core/CPP/ICPPSimpleKernel.h create mode 100644 arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h create mode 100644 arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h create mode 100644 arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h create mode 100644 arm_compute/core/Coordinates.h create mode 100644 arm_compute/core/Dimensions.h create mode 100644 arm_compute/core/Error.h create mode 100644 arm_compute/core/FixedPoint.h create mode 100644 arm_compute/core/FixedPoint.inl create mode 100644 arm_compute/core/HOGInfo.h create mode 100644 arm_compute/core/Helpers.h create mode 100644 arm_compute/core/Helpers.inl create mode 100644 arm_compute/core/IAccessWindow.h create mode 100644 arm_compute/core/IArray.h create mode 100644 arm_compute/core/IDistribution.h create mode 100644 arm_compute/core/IDistribution1D.h create mode 100644 arm_compute/core/IHOG.h create mode 100644 arm_compute/core/IKernel.h create mode 100644 arm_compute/core/ILut.h create mode 100644 arm_compute/core/IMultiHOG.h create mode 100644 arm_compute/core/IMultiImage.h create mode 100644 arm_compute/core/IPyramid.h create mode 100644 arm_compute/core/ITensor.h create mode 100644 arm_compute/core/ITensorInfo.h create mode 100644 arm_compute/core/MultiImageInfo.h create mode 100644 arm_compute/core/NEON/INEKernel.h create mode 100644 arm_compute/core/NEON/INESimpleKernel.h create mode 100644 arm_compute/core/NEON/NEColorConvertHelper.inl create mode 100644 arm_compute/core/NEON/NEFixedPoint.h create mode 100644 arm_compute/core/NEON/NEFixedPoint.inl create mode 100644 arm_compute/core/NEON/NEKernels.h create mode 100644 arm_compute/core/NEON/NEMath.h create mode 100644 arm_compute/core/NEON/NEMath.inl create mode 100644 arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEAccumulateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEActivationLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBox3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NECannyEdgeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEChannelCombineKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEChannelExtractKernel.h create mode 100644 arm_compute/core/NEON/kernels/NECol2ImKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEColorConvertKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEConvolutionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDerivativeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDilateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEErodeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFastCornersKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFillArrayKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFillBorderKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEHistogramKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEIm2ColKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEIntegralImageKernel.h create mode 100644 arm_compute/core/NEON/kernels/NELKTrackerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h create mode 100644 arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h create mode 100644 arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NERemapKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEScaleKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESobel3x3Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESobel5x5Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESobel7x7Kernel.h create mode 100644 arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NETableLookupKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEThresholdKernel.h create mode 100644 arm_compute/core/NEON/kernels/NETransposeKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEWarpKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h create mode 100644 arm_compute/core/PixelValue.h create mode 100644 arm_compute/core/PyramidInfo.h create mode 100644 arm_compute/core/Size2D.h create mode 100644 arm_compute/core/Steps.h create mode 100644 arm_compute/core/Strides.h create mode 100644 arm_compute/core/SubTensorInfo.h create mode 100644 arm_compute/core/TensorInfo.h create mode 100644 arm_compute/core/TensorShape.h create mode 100644 arm_compute/core/Types.h create mode 100644 arm_compute/core/Utils.h create mode 100644 arm_compute/core/Validate.h create mode 100644 arm_compute/core/Window.h create mode 100644 arm_compute/core/Window.inl create mode 100644 arm_compute/runtime/Array.h create mode 100644 arm_compute/runtime/CL/CLArray.h create mode 100644 arm_compute/runtime/CL/CLDistribution1D.h create mode 100644 arm_compute/runtime/CL/CLFunctions.h create mode 100644 arm_compute/runtime/CL/CLHOG.h create mode 100644 arm_compute/runtime/CL/CLLut.h create mode 100644 arm_compute/runtime/CL/CLLutAllocator.h create mode 100644 arm_compute/runtime/CL/CLMultiHOG.h create mode 100644 arm_compute/runtime/CL/CLMultiImage.h create mode 100644 arm_compute/runtime/CL/CLPyramid.h create mode 100644 arm_compute/runtime/CL/CLScheduler.h create mode 100644 arm_compute/runtime/CL/CLSubTensor.h create mode 100644 arm_compute/runtime/CL/CLTensor.h create mode 100644 arm_compute/runtime/CL/CLTensorAllocator.h create mode 100644 arm_compute/runtime/CL/ICLSimpleFunction.h create mode 100644 arm_compute/runtime/CL/functions/CLAbsoluteDifference.h create mode 100644 arm_compute/runtime/CL/functions/CLAccumulate.h create mode 100644 arm_compute/runtime/CL/functions/CLActivationLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLArithmeticAddition.h create mode 100644 arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h create mode 100644 arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseAnd.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseNot.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseOr.h create mode 100644 arm_compute/runtime/CL/functions/CLBitwiseXor.h create mode 100644 arm_compute/runtime/CL/functions/CLBox3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLCannyEdge.h create mode 100644 arm_compute/runtime/CL/functions/CLChannelCombine.h create mode 100644 arm_compute/runtime/CL/functions/CLChannelExtract.h create mode 100644 arm_compute/runtime/CL/functions/CLColorConvert.h create mode 100644 arm_compute/runtime/CL/functions/CLConvolution.h create mode 100644 arm_compute/runtime/CL/functions/CLConvolutionLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthConcatenate.h create mode 100644 arm_compute/runtime/CL/functions/CLDepthConvert.h create mode 100644 arm_compute/runtime/CL/functions/CLDerivative.h create mode 100644 arm_compute/runtime/CL/functions/CLDilate.h create mode 100644 arm_compute/runtime/CL/functions/CLEqualizeHistogram.h create mode 100644 arm_compute/runtime/CL/functions/CLErode.h create mode 100644 arm_compute/runtime/CL/functions/CLFastCorners.h create mode 100644 arm_compute/runtime/CL/functions/CLFillBorder.h create mode 100644 arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLGEMM.h create mode 100644 arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h create mode 100644 arm_compute/runtime/CL/functions/CLGEMMLowp.h create mode 100644 arm_compute/runtime/CL/functions/CLGaussian3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLGaussian5x5.h create mode 100644 arm_compute/runtime/CL/functions/CLGaussianPyramid.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGDescriptor.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGDetector.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGGradient.h create mode 100644 arm_compute/runtime/CL/functions/CLHOGMultiDetection.h create mode 100644 arm_compute/runtime/CL/functions/CLHarrisCorners.h create mode 100644 arm_compute/runtime/CL/functions/CLHistogram.h create mode 100644 arm_compute/runtime/CL/functions/CLIntegralImage.h create mode 100644 arm_compute/runtime/CL/functions/CLLaplacianPyramid.h create mode 100644 arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h create mode 100644 arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLMagnitude.h create mode 100644 arm_compute/runtime/CL/functions/CLMeanStdDev.h create mode 100644 arm_compute/runtime/CL/functions/CLMedian3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLMinMaxLocation.h create mode 100644 arm_compute/runtime/CL/functions/CLNonLinearFilter.h create mode 100644 arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLNormalizationLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLOpticalFlow.h create mode 100644 arm_compute/runtime/CL/functions/CLPhase.h create mode 100644 arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h create mode 100644 arm_compute/runtime/CL/functions/CLPoolingLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLRemap.h create mode 100644 arm_compute/runtime/CL/functions/CLScale.h create mode 100644 arm_compute/runtime/CL/functions/CLScharr3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLSobel3x3.h create mode 100644 arm_compute/runtime/CL/functions/CLSobel5x5.h create mode 100644 arm_compute/runtime/CL/functions/CLSobel7x7.h create mode 100644 arm_compute/runtime/CL/functions/CLSoftmaxLayer.h create mode 100644 arm_compute/runtime/CL/functions/CLTableLookup.h create mode 100644 arm_compute/runtime/CL/functions/CLThreshold.h create mode 100644 arm_compute/runtime/CL/functions/CLTranspose.h create mode 100644 arm_compute/runtime/CL/functions/CLWarpAffine.h create mode 100644 arm_compute/runtime/CL/functions/CLWarpPerspective.h create mode 100644 arm_compute/runtime/CPP/CPPScheduler.h create mode 100644 arm_compute/runtime/Distribution1D.h create mode 100644 arm_compute/runtime/HOG.h create mode 100644 arm_compute/runtime/IFunction.h create mode 100644 arm_compute/runtime/ILutAllocator.h create mode 100644 arm_compute/runtime/IScheduler.h create mode 100644 arm_compute/runtime/ITensorAllocator.h create mode 100644 arm_compute/runtime/Lut.h create mode 100644 arm_compute/runtime/LutAllocator.h create mode 100644 arm_compute/runtime/MultiHOG.h create mode 100644 arm_compute/runtime/MultiImage.h create mode 100644 arm_compute/runtime/NEON/INESimpleFunction.h create mode 100644 arm_compute/runtime/NEON/NEFunctions.h create mode 100644 arm_compute/runtime/NEON/NEScheduler.h create mode 100644 arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h create mode 100644 arm_compute/runtime/NEON/functions/NEAccumulate.h create mode 100644 arm_compute/runtime/NEON/functions/NEActivationLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEArithmeticAddition.h create mode 100644 arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h create mode 100644 arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseAnd.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseNot.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseOr.h create mode 100644 arm_compute/runtime/NEON/functions/NEBitwiseXor.h create mode 100644 arm_compute/runtime/NEON/functions/NEBox3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NECannyEdge.h create mode 100644 arm_compute/runtime/NEON/functions/NEChannelCombine.h create mode 100644 arm_compute/runtime/NEON/functions/NEChannelExtract.h create mode 100644 arm_compute/runtime/NEON/functions/NEColorConvert.h create mode 100644 arm_compute/runtime/NEON/functions/NEConvolution.h create mode 100644 arm_compute/runtime/NEON/functions/NEConvolutionLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConcatenate.h create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConvert.h create mode 100644 arm_compute/runtime/NEON/functions/NEDerivative.h create mode 100644 arm_compute/runtime/NEON/functions/NEDilate.h create mode 100644 arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h create mode 100644 arm_compute/runtime/NEON/functions/NEErode.h create mode 100644 arm_compute/runtime/NEON/functions/NEFastCorners.h create mode 100644 arm_compute/runtime/NEON/functions/NEFillBorder.h create mode 100644 arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMM.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMMLowp.h create mode 100644 arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h create mode 100644 arm_compute/runtime/NEON/functions/NEGaussian3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NEGaussian5x5.h create mode 100644 arm_compute/runtime/NEON/functions/NEGaussianPyramid.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGDescriptor.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGDetector.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGGradient.h create mode 100644 arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h create mode 100644 arm_compute/runtime/NEON/functions/NEHarrisCorners.h create mode 100644 arm_compute/runtime/NEON/functions/NEHistogram.h create mode 100644 arm_compute/runtime/NEON/functions/NEIntegralImage.h create mode 100644 arm_compute/runtime/NEON/functions/NELaplacianPyramid.h create mode 100644 arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h create mode 100644 arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEMagnitude.h create mode 100644 arm_compute/runtime/NEON/functions/NEMeanStdDev.h create mode 100644 arm_compute/runtime/NEON/functions/NEMedian3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NEMinMaxLocation.h create mode 100644 arm_compute/runtime/NEON/functions/NENonLinearFilter.h create mode 100644 arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NENormalizationLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NEOpticalFlow.h create mode 100644 arm_compute/runtime/NEON/functions/NEPhase.h create mode 100644 arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h create mode 100644 arm_compute/runtime/NEON/functions/NEPoolingLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NERemap.h create mode 100644 arm_compute/runtime/NEON/functions/NEScale.h create mode 100644 arm_compute/runtime/NEON/functions/NEScharr3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NESobel3x3.h create mode 100644 arm_compute/runtime/NEON/functions/NESobel5x5.h create mode 100644 arm_compute/runtime/NEON/functions/NESobel7x7.h create mode 100644 arm_compute/runtime/NEON/functions/NESoftmaxLayer.h create mode 100644 arm_compute/runtime/NEON/functions/NETableLookup.h create mode 100644 arm_compute/runtime/NEON/functions/NEThreshold.h create mode 100644 arm_compute/runtime/NEON/functions/NETranspose.h create mode 100644 arm_compute/runtime/NEON/functions/NEWarpAffine.h create mode 100644 arm_compute/runtime/NEON/functions/NEWarpPerspective.h create mode 100644 arm_compute/runtime/OMP/OMPScheduler.h create mode 100644 arm_compute/runtime/Pyramid.h create mode 100644 arm_compute/runtime/Scheduler.h create mode 100644 arm_compute/runtime/SingleThreadScheduler.h create mode 100644 arm_compute/runtime/SubTensor.h create mode 100644 arm_compute/runtime/Tensor.h create mode 100644 arm_compute/runtime/TensorAllocator.h create mode 100644 arm_compute/runtime/Utils.h create mode 160000 data create mode 100644 docs/00_introduction.dox create mode 100644 docs/01_library.dox create mode 100644 docs/02_tests.dox create mode 100644 docs/Doxyfile create mode 100644 docs/header.html create mode 100644 examples/SConscript create mode 100644 examples/cl_convolution.cpp create mode 100644 examples/cl_events.cpp create mode 100644 examples/neon_cnn.cpp create mode 100644 examples/neon_convolution.cpp create mode 100644 examples/neon_copy_objects.cpp create mode 100644 examples/neon_scale.cpp create mode 100644 examples/neoncl_scale_median_gaussian.cpp create mode 100644 include/CL/cl.h create mode 100644 include/CL/cl.hpp create mode 100644 include/CL/cl2.hpp create mode 100644 include/CL/cl_d3d10.h create mode 100644 include/CL/cl_d3d11.h create mode 100644 include/CL/cl_dx9_media_sharing.h create mode 100644 include/CL/cl_egl.h create mode 100644 include/CL/cl_ext.h create mode 100644 include/CL/cl_gl.h create mode 100644 include/CL/cl_gl_ext.h create mode 100644 include/CL/cl_platform.h create mode 100644 include/CL/opencl.h create mode 100644 opencl-1.2-stubs/SConscript create mode 100755 opencl-1.2-stubs/opencl_stubs.c create mode 100755 scripts/add_copyright.py create mode 100755 scripts/check_bad_style.sh create mode 100755 scripts/check_clang-tidy.py create mode 100644 scripts/clang-tidy.h create mode 100755 scripts/clang-tidy.sh create mode 100644 scripts/copyright_eula.txt create mode 100644 scripts/copyright_mit.txt create mode 100755 scripts/fix_code_formatting.sh create mode 100755 scripts/format_doxygen.py create mode 100755 scripts/include_functions_kernels.py create mode 100644 src/core/AccessWindowAutoPadding.cpp create mode 100644 src/core/AccessWindowStatic.cpp create mode 100644 src/core/AccessWindowTranspose.cpp create mode 100644 src/core/CL/CLHelpers.cpp create mode 100644 src/core/CL/CLKernelLibrary.cpp create mode 100644 src/core/CL/ICLDistribution1D.cpp create mode 100644 src/core/CL/ICLHOG.cpp create mode 100644 src/core/CL/ICLKernel.cpp create mode 100644 src/core/CL/ICLLut.cpp create mode 100644 src/core/CL/ICLMultiHOG.cpp create mode 100644 src/core/CL/ICLMultiImage.cpp create mode 100644 src/core/CL/ICLSimple2DKernel.cpp create mode 100644 src/core/CL/ICLSimple3DKernel.cpp create mode 100644 src/core/CL/ICLSimpleKernel.cpp create mode 100644 src/core/CL/ICLTensor.cpp create mode 100644 src/core/CL/OpenCL.cpp create mode 100644 src/core/CL/cl_kernels/absdiff.cl create mode 100644 src/core/CL/cl_kernels/accumulate.cl create mode 100644 src/core/CL/cl_kernels/activation_layer.cl create mode 100644 src/core/CL/cl_kernels/arithmetic_op.cl create mode 100644 src/core/CL/cl_kernels/batchnormalization_layer.cl create mode 100644 src/core/CL/cl_kernels/bitwise_op.cl create mode 100644 src/core/CL/cl_kernels/canny.cl create mode 100644 src/core/CL/cl_kernels/channel_combine.cl create mode 100644 src/core/CL/cl_kernels/channel_extract.cl create mode 100644 src/core/CL/cl_kernels/color_convert.cl create mode 100644 src/core/CL/cl_kernels/concatenate.cl create mode 100644 src/core/CL/cl_kernels/convolution3x3.cl create mode 100644 src/core/CL/cl_kernels/convolution5x5.cl create mode 100644 src/core/CL/cl_kernels/convolution7x7.cl create mode 100644 src/core/CL/cl_kernels/convolution9x9.cl create mode 100644 src/core/CL/cl_kernels/convolution_layer.cl create mode 100644 src/core/CL/cl_kernels/convolution_rectangle.cl create mode 100644 src/core/CL/cl_kernels/depth_convert.cl create mode 100644 src/core/CL/cl_kernels/derivative.cl create mode 100644 src/core/CL/cl_kernels/dilate.cl create mode 100644 src/core/CL/cl_kernels/erode.cl create mode 100644 src/core/CL/cl_kernels/fast_corners.cl create mode 100644 src/core/CL/cl_kernels/fill_border.cl create mode 100644 src/core/CL/cl_kernels/gaussian_pyramid.cl create mode 100644 src/core/CL/cl_kernels/gemm.cl create mode 100644 src/core/CL/cl_kernels/harris_corners.cl create mode 100644 src/core/CL/cl_kernels/helpers.h create mode 100644 src/core/CL/cl_kernels/histogram.cl create mode 100644 src/core/CL/cl_kernels/hog.cl create mode 100644 src/core/CL/cl_kernels/integral_image.cl create mode 100644 src/core/CL/cl_kernels/magnitude_phase.cl create mode 100644 src/core/CL/cl_kernels/mean_stddev.cl create mode 100644 src/core/CL/cl_kernels/minmaxloc.cl create mode 100644 src/core/CL/cl_kernels/non_linear_filter3x3.cl create mode 100644 src/core/CL/cl_kernels/non_linear_filter5x5.cl create mode 100644 src/core/CL/cl_kernels/non_linear_filter_helpers.h create mode 100644 src/core/CL/cl_kernels/nonmax.cl create mode 100644 src/core/CL/cl_kernels/normalization_layer.cl create mode 100644 src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl create mode 100644 src/core/CL/cl_kernels/pixelwise_mul_float.cl create mode 100644 src/core/CL/cl_kernels/pixelwise_mul_int.cl create mode 100644 src/core/CL/cl_kernels/pooling_layer.cl create mode 100644 src/core/CL/cl_kernels/remap.cl create mode 100644 src/core/CL/cl_kernels/scale.cl create mode 100644 src/core/CL/cl_kernels/scharr_filter.cl create mode 100644 src/core/CL/cl_kernels/sobel_filter.cl create mode 100644 src/core/CL/cl_kernels/softmax_layer.cl create mode 100644 src/core/CL/cl_kernels/tablelookup.cl create mode 100644 src/core/CL/cl_kernels/threshold.cl create mode 100644 src/core/CL/cl_kernels/transpose.cl create mode 100644 src/core/CL/cl_kernels/types.h create mode 100644 src/core/CL/cl_kernels/warp_affine.cl create mode 100644 src/core/CL/cl_kernels/warp_helpers.h create mode 100644 src/core/CL/cl_kernels/warp_perspective.cl create mode 100644 src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp create mode 100644 src/core/CL/kernels/CLAccumulateKernel.cpp create mode 100644 src/core/CL/kernels/CLActivationLayerKernel.cpp create mode 100644 src/core/CL/kernels/CLArithmeticAdditionKernel.cpp create mode 100644 src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp create mode 100644 src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp create mode 100644 src/core/CL/kernels/CLBitwiseAndKernel.cpp create mode 100644 src/core/CL/kernels/CLBitwiseNotKernel.cpp create mode 100644 src/core/CL/kernels/CLBitwiseOrKernel.cpp create mode 100644 src/core/CL/kernels/CLBitwiseXorKernel.cpp create mode 100644 src/core/CL/kernels/CLBox3x3Kernel.cpp create mode 100644 src/core/CL/kernels/CLCannyEdgeKernel.cpp create mode 100644 src/core/CL/kernels/CLChannelCombineKernel.cpp create mode 100644 src/core/CL/kernels/CLChannelExtractKernel.cpp create mode 100644 src/core/CL/kernels/CLCol2ImKernel.cpp create mode 100644 src/core/CL/kernels/CLColorConvertKernel.cpp create mode 100644 src/core/CL/kernels/CLConvolutionKernel.cpp create mode 100644 src/core/CL/kernels/CLDepthConcatenateKernel.cpp create mode 100644 src/core/CL/kernels/CLDepthConvertKernel.cpp create mode 100644 src/core/CL/kernels/CLDerivativeKernel.cpp create mode 100644 src/core/CL/kernels/CLDilateKernel.cpp create mode 100644 src/core/CL/kernels/CLErodeKernel.cpp create mode 100644 src/core/CL/kernels/CLFastCornersKernel.cpp create mode 100644 src/core/CL/kernels/CLFillBorderKernel.cpp create mode 100644 src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp create mode 100644 src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp create mode 100644 src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp create mode 100644 src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp create mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp create mode 100644 src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp create mode 100644 src/core/CL/kernels/CLGaussian3x3Kernel.cpp create mode 100644 src/core/CL/kernels/CLGaussian5x5Kernel.cpp create mode 100644 src/core/CL/kernels/CLGaussianPyramidKernel.cpp create mode 100644 src/core/CL/kernels/CLHOGDescriptorKernel.cpp create mode 100644 src/core/CL/kernels/CLHOGDetectorKernel.cpp create mode 100644 src/core/CL/kernels/CLHarrisCornersKernel.cpp create mode 100644 src/core/CL/kernels/CLHistogramKernel.cpp create mode 100644 src/core/CL/kernels/CLIm2ColKernel.cpp create mode 100644 src/core/CL/kernels/CLIntegralImageKernel.cpp create mode 100644 src/core/CL/kernels/CLLKTrackerKernel.cpp create mode 100644 src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp create mode 100644 src/core/CL/kernels/CLMagnitudePhaseKernel.cpp create mode 100644 src/core/CL/kernels/CLMeanStdDevKernel.cpp create mode 100644 src/core/CL/kernels/CLMedian3x3Kernel.cpp create mode 100644 src/core/CL/kernels/CLMinMaxLocationKernel.cpp create mode 100644 src/core/CL/kernels/CLNonLinearFilterKernel.cpp create mode 100644 src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp create mode 100644 src/core/CL/kernels/CLNormalizationLayerKernel.cpp create mode 100644 src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp create mode 100644 src/core/CL/kernels/CLPoolingLayerKernel.cpp create mode 100644 src/core/CL/kernels/CLRemapKernel.cpp create mode 100644 src/core/CL/kernels/CLScaleKernel.cpp create mode 100644 src/core/CL/kernels/CLScharr3x3Kernel.cpp create mode 100644 src/core/CL/kernels/CLSobel3x3Kernel.cpp create mode 100644 src/core/CL/kernels/CLSobel5x5Kernel.cpp create mode 100644 src/core/CL/kernels/CLSobel7x7Kernel.cpp create mode 100644 src/core/CL/kernels/CLSoftmaxLayerKernel.cpp create mode 100644 src/core/CL/kernels/CLTableLookupKernel.cpp create mode 100644 src/core/CL/kernels/CLThresholdKernel.cpp create mode 100644 src/core/CL/kernels/CLTransposeKernel.cpp create mode 100644 src/core/CL/kernels/CLWarpAffineKernel.cpp create mode 100644 src/core/CL/kernels/CLWarpPerspectiveKernel.cpp create mode 100644 src/core/CL/kernels/CLWeightsReshapeKernel.cpp create mode 100644 src/core/CPP/ICPPSimpleKernel.cpp create mode 100644 src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp create mode 100644 src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp create mode 100644 src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp create mode 100644 src/core/Error.cpp create mode 100644 src/core/HOGInfo.cpp create mode 100644 src/core/Helpers.cpp create mode 100644 src/core/IAccessWindow.cpp create mode 100644 src/core/IDistribution.cpp create mode 100644 src/core/IDistribution1D.cpp create mode 100644 src/core/IKernel.cpp create mode 100644 src/core/ITensor.cpp create mode 100644 src/core/MultiImageInfo.cpp create mode 100644 src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp create mode 100644 src/core/NEON/kernels/NEAccumulateKernel.cpp create mode 100644 src/core/NEON/kernels/NEActivationLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp create mode 100644 src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp create mode 100644 src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NEBitwiseAndKernel.cpp create mode 100644 src/core/NEON/kernels/NEBitwiseNotKernel.cpp create mode 100644 src/core/NEON/kernels/NEBitwiseOrKernel.cpp create mode 100644 src/core/NEON/kernels/NEBitwiseXorKernel.cpp create mode 100644 src/core/NEON/kernels/NEBox3x3Kernel.cpp create mode 100644 src/core/NEON/kernels/NECannyEdgeKernel.cpp create mode 100644 src/core/NEON/kernels/NEChannelCombineKernel.cpp create mode 100644 src/core/NEON/kernels/NEChannelExtractKernel.cpp create mode 100644 src/core/NEON/kernels/NECol2ImKernel.cpp create mode 100644 src/core/NEON/kernels/NEColorConvertKernel.cpp create mode 100644 src/core/NEON/kernels/NEConvolutionKernel.cpp create mode 100644 src/core/NEON/kernels/NECumulativeDistributionKernel.cpp create mode 100644 src/core/NEON/kernels/NEDepthConcatenateKernel.cpp create mode 100644 src/core/NEON/kernels/NEDepthConvertKernel.cpp create mode 100644 src/core/NEON/kernels/NEDerivativeKernel.cpp create mode 100644 src/core/NEON/kernels/NEDilateKernel.cpp create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp create mode 100644 src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NEErodeKernel.cpp create mode 100644 src/core/NEON/kernels/NEFastCornersKernel.cpp create mode 100644 src/core/NEON/kernels/NEFillArrayKernel.cpp create mode 100644 src/core/NEON/kernels/NEFillBorderKernel.cpp create mode 100644 src/core/NEON/kernels/NEFillInnerBorderKernel.cpp create mode 100644 src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp create mode 100644 src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp create mode 100644 src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp create mode 100644 src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp create mode 100644 src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp create mode 100644 src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp create mode 100644 src/core/NEON/kernels/NEGaussian3x3Kernel.cpp create mode 100644 src/core/NEON/kernels/NEGaussian5x5Kernel.cpp create mode 100644 src/core/NEON/kernels/NEGaussianPyramidKernel.cpp create mode 100644 src/core/NEON/kernels/NEHOGDescriptorKernel.cpp create mode 100644 src/core/NEON/kernels/NEHOGDetectorKernel.cpp create mode 100644 src/core/NEON/kernels/NEHarrisCornersKernel.cpp create mode 100644 src/core/NEON/kernels/NEHistogramKernel.cpp create mode 100644 src/core/NEON/kernels/NEIm2ColKernel.cpp create mode 100644 src/core/NEON/kernels/NEIntegralImageKernel.cpp create mode 100644 src/core/NEON/kernels/NELKTrackerKernel.cpp create mode 100644 src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp create mode 100644 src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp create mode 100644 src/core/NEON/kernels/NEMeanStdDevKernel.cpp create mode 100644 src/core/NEON/kernels/NEMedian3x3Kernel.cpp create mode 100644 src/core/NEON/kernels/NEMinMaxLocationKernel.cpp create mode 100644 src/core/NEON/kernels/NENonLinearFilterKernel.cpp create mode 100644 src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp create mode 100644 src/core/NEON/kernels/NENormalizationLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp create mode 100644 src/core/NEON/kernels/NEPoolingLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NERemapKernel.cpp create mode 100644 src/core/NEON/kernels/NEScaleKernel.cpp create mode 100644 src/core/NEON/kernels/NEScharr3x3Kernel.cpp create mode 100644 src/core/NEON/kernels/NESobel3x3Kernel.cpp create mode 100644 src/core/NEON/kernels/NESobel5x5Kernel.cpp create mode 100644 src/core/NEON/kernels/NESobel7x7Kernel.cpp create mode 100644 src/core/NEON/kernels/NESoftmaxLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NETableLookupKernel.cpp create mode 100644 src/core/NEON/kernels/NEThresholdKernel.cpp create mode 100644 src/core/NEON/kernels/NETransposeKernel.cpp create mode 100644 src/core/NEON/kernels/NEWarpKernel.cpp create mode 100644 src/core/NEON/kernels/NEWeightsReshapeKernel.cpp create mode 100644 src/core/PyramidInfo.cpp create mode 100644 src/core/SubTensorInfo.cpp create mode 100644 src/core/TensorInfo.cpp create mode 100644 src/core/Utils.cpp create mode 100644 src/core/Validate.cpp create mode 100644 src/runtime/CL/CLDistribution1D.cpp create mode 100644 src/runtime/CL/CLHOG.cpp create mode 100644 src/runtime/CL/CLLut.cpp create mode 100644 src/runtime/CL/CLLutAllocator.cpp create mode 100644 src/runtime/CL/CLMultiHOG.cpp create mode 100644 src/runtime/CL/CLMultiImage.cpp create mode 100644 src/runtime/CL/CLPyramid.cpp create mode 100644 src/runtime/CL/CLScheduler.cpp create mode 100644 src/runtime/CL/CLSubTensor.cpp create mode 100644 src/runtime/CL/CLTensor.cpp create mode 100644 src/runtime/CL/CLTensorAllocator.cpp create mode 100644 src/runtime/CL/ICLSimpleFunction.cpp create mode 100644 src/runtime/CL/functions/CLAbsoluteDifference.cpp create mode 100644 src/runtime/CL/functions/CLAccumulate.cpp create mode 100644 src/runtime/CL/functions/CLActivationLayer.cpp create mode 100644 src/runtime/CL/functions/CLArithmeticAddition.cpp create mode 100644 src/runtime/CL/functions/CLArithmeticSubtraction.cpp create mode 100644 src/runtime/CL/functions/CLBatchNormalizationLayer.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseAnd.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseNot.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseOr.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseXor.cpp create mode 100644 src/runtime/CL/functions/CLBox3x3.cpp create mode 100644 src/runtime/CL/functions/CLCannyEdge.cpp create mode 100644 src/runtime/CL/functions/CLChannelCombine.cpp create mode 100644 src/runtime/CL/functions/CLChannelExtract.cpp create mode 100644 src/runtime/CL/functions/CLColorConvert.cpp create mode 100644 src/runtime/CL/functions/CLConvolution.cpp create mode 100644 src/runtime/CL/functions/CLConvolutionLayer.cpp create mode 100644 src/runtime/CL/functions/CLDepthConcatenate.cpp create mode 100644 src/runtime/CL/functions/CLDepthConvert.cpp create mode 100644 src/runtime/CL/functions/CLDerivative.cpp create mode 100644 src/runtime/CL/functions/CLDilate.cpp create mode 100644 src/runtime/CL/functions/CLEqualizeHistogram.cpp create mode 100644 src/runtime/CL/functions/CLErode.cpp create mode 100644 src/runtime/CL/functions/CLFastCorners.cpp create mode 100644 src/runtime/CL/functions/CLFillBorder.cpp create mode 100644 src/runtime/CL/functions/CLFullyConnectedLayer.cpp create mode 100644 src/runtime/CL/functions/CLGEMM.cpp create mode 100644 src/runtime/CL/functions/CLGEMMInterleave4x4.cpp create mode 100644 src/runtime/CL/functions/CLGEMMLowp.cpp create mode 100644 src/runtime/CL/functions/CLGaussian3x3.cpp create mode 100644 src/runtime/CL/functions/CLGaussian5x5.cpp create mode 100644 src/runtime/CL/functions/CLGaussianPyramid.cpp create mode 100644 src/runtime/CL/functions/CLHOGDescriptor.cpp create mode 100644 src/runtime/CL/functions/CLHOGDetector.cpp create mode 100644 src/runtime/CL/functions/CLHOGGradient.cpp create mode 100644 src/runtime/CL/functions/CLHOGMultiDetection.cpp create mode 100644 src/runtime/CL/functions/CLHarrisCorners.cpp create mode 100644 src/runtime/CL/functions/CLHistogram.cpp create mode 100644 src/runtime/CL/functions/CLIntegralImage.cpp create mode 100644 src/runtime/CL/functions/CLLaplacianPyramid.cpp create mode 100644 src/runtime/CL/functions/CLLaplacianReconstruct.cpp create mode 100644 src/runtime/CL/functions/CLLocallyConnectedLayer.cpp create mode 100644 src/runtime/CL/functions/CLMagnitude.cpp create mode 100644 src/runtime/CL/functions/CLMeanStdDev.cpp create mode 100644 src/runtime/CL/functions/CLMedian3x3.cpp create mode 100644 src/runtime/CL/functions/CLMinMaxLocation.cpp create mode 100644 src/runtime/CL/functions/CLNonLinearFilter.cpp create mode 100644 src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp create mode 100644 src/runtime/CL/functions/CLNormalizationLayer.cpp create mode 100644 src/runtime/CL/functions/CLOpticalFlow.cpp create mode 100644 src/runtime/CL/functions/CLPhase.cpp create mode 100644 src/runtime/CL/functions/CLPixelWiseMultiplication.cpp create mode 100644 src/runtime/CL/functions/CLPoolingLayer.cpp create mode 100644 src/runtime/CL/functions/CLRemap.cpp create mode 100644 src/runtime/CL/functions/CLScale.cpp create mode 100644 src/runtime/CL/functions/CLScharr3x3.cpp create mode 100644 src/runtime/CL/functions/CLSobel3x3.cpp create mode 100644 src/runtime/CL/functions/CLSobel5x5.cpp create mode 100644 src/runtime/CL/functions/CLSobel7x7.cpp create mode 100644 src/runtime/CL/functions/CLSoftmaxLayer.cpp create mode 100644 src/runtime/CL/functions/CLTableLookup.cpp create mode 100644 src/runtime/CL/functions/CLThreshold.cpp create mode 100644 src/runtime/CL/functions/CLTranspose.cpp create mode 100644 src/runtime/CL/functions/CLWarpAffine.cpp create mode 100644 src/runtime/CL/functions/CLWarpPerspective.cpp create mode 100644 src/runtime/CPP/CPPScheduler.cpp create mode 100644 src/runtime/CPP/SingleThreadScheduler.cpp create mode 100644 src/runtime/Distribution1D.cpp create mode 100644 src/runtime/HOG.cpp create mode 100644 src/runtime/ILutAllocator.cpp create mode 100644 src/runtime/ITensorAllocator.cpp create mode 100644 src/runtime/Lut.cpp create mode 100644 src/runtime/LutAllocator.cpp create mode 100644 src/runtime/MultiHOG.cpp create mode 100644 src/runtime/MultiImage.cpp create mode 100644 src/runtime/NEON/INESimpleFunction.cpp create mode 100644 src/runtime/NEON/functions/NEAbsoluteDifference.cpp create mode 100644 src/runtime/NEON/functions/NEAccumulate.cpp create mode 100644 src/runtime/NEON/functions/NEActivationLayer.cpp create mode 100644 src/runtime/NEON/functions/NEArithmeticAddition.cpp create mode 100644 src/runtime/NEON/functions/NEArithmeticSubtraction.cpp create mode 100644 src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseAnd.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseNot.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseOr.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseXor.cpp create mode 100644 src/runtime/NEON/functions/NEBox3x3.cpp create mode 100644 src/runtime/NEON/functions/NECannyEdge.cpp create mode 100644 src/runtime/NEON/functions/NEChannelCombine.cpp create mode 100644 src/runtime/NEON/functions/NEChannelExtract.cpp create mode 100644 src/runtime/NEON/functions/NEColorConvert.cpp create mode 100644 src/runtime/NEON/functions/NEConvolution.cpp create mode 100644 src/runtime/NEON/functions/NEConvolutionLayer.cpp create mode 100644 src/runtime/NEON/functions/NEDepthConcatenate.cpp create mode 100644 src/runtime/NEON/functions/NEDepthConvert.cpp create mode 100644 src/runtime/NEON/functions/NEDerivative.cpp create mode 100644 src/runtime/NEON/functions/NEDilate.cpp create mode 100644 src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp create mode 100644 src/runtime/NEON/functions/NEEqualizeHistogram.cpp create mode 100644 src/runtime/NEON/functions/NEErode.cpp create mode 100644 src/runtime/NEON/functions/NEFastCorners.cpp create mode 100644 src/runtime/NEON/functions/NEFillBorder.cpp create mode 100644 src/runtime/NEON/functions/NEFullyConnectedLayer.cpp create mode 100644 src/runtime/NEON/functions/NEGEMM.cpp create mode 100644 src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp create mode 100644 src/runtime/NEON/functions/NEGEMMLowp.cpp create mode 100644 src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp create mode 100644 src/runtime/NEON/functions/NEGaussian3x3.cpp create mode 100644 src/runtime/NEON/functions/NEGaussian5x5.cpp create mode 100644 src/runtime/NEON/functions/NEGaussianPyramid.cpp create mode 100644 src/runtime/NEON/functions/NEHOGDescriptor.cpp create mode 100644 src/runtime/NEON/functions/NEHOGDetector.cpp create mode 100644 src/runtime/NEON/functions/NEHOGGradient.cpp create mode 100644 src/runtime/NEON/functions/NEHOGMultiDetection.cpp create mode 100644 src/runtime/NEON/functions/NEHarrisCorners.cpp create mode 100644 src/runtime/NEON/functions/NEHistogram.cpp create mode 100644 src/runtime/NEON/functions/NEIntegralImage.cpp create mode 100644 src/runtime/NEON/functions/NELaplacianPyramid.cpp create mode 100644 src/runtime/NEON/functions/NELaplacianReconstruct.cpp create mode 100644 src/runtime/NEON/functions/NELocallyConnectedLayer.cpp create mode 100644 src/runtime/NEON/functions/NEMagnitude.cpp create mode 100644 src/runtime/NEON/functions/NEMeanStdDev.cpp create mode 100644 src/runtime/NEON/functions/NEMedian3x3.cpp create mode 100644 src/runtime/NEON/functions/NEMinMaxLocation.cpp create mode 100644 src/runtime/NEON/functions/NENonLinearFilter.cpp create mode 100644 src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp create mode 100644 src/runtime/NEON/functions/NENormalizationLayer.cpp create mode 100644 src/runtime/NEON/functions/NEOpticalFlow.cpp create mode 100644 src/runtime/NEON/functions/NEPhase.cpp create mode 100644 src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp create mode 100644 src/runtime/NEON/functions/NEPoolingLayer.cpp create mode 100644 src/runtime/NEON/functions/NERemap.cpp create mode 100644 src/runtime/NEON/functions/NEScale.cpp create mode 100644 src/runtime/NEON/functions/NEScharr3x3.cpp create mode 100644 src/runtime/NEON/functions/NESobel3x3.cpp create mode 100644 src/runtime/NEON/functions/NESobel5x5.cpp create mode 100644 src/runtime/NEON/functions/NESobel7x7.cpp create mode 100644 src/runtime/NEON/functions/NESoftmaxLayer.cpp create mode 100644 src/runtime/NEON/functions/NETableLookup.cpp create mode 100644 src/runtime/NEON/functions/NEThreshold.cpp create mode 100644 src/runtime/NEON/functions/NETranspose.cpp create mode 100644 src/runtime/NEON/functions/NEWarpAffine.cpp create mode 100644 src/runtime/NEON/functions/NEWarpPerspective.cpp create mode 100644 src/runtime/OMP/OMPScheduler.cpp create mode 100644 src/runtime/Pyramid.cpp create mode 100644 src/runtime/Scheduler.cpp create mode 100644 src/runtime/SubTensor.cpp create mode 100644 src/runtime/Tensor.cpp create mode 100644 src/runtime/TensorAllocator.cpp create mode 100644 src/runtime/Utils.cpp create mode 100644 tests/CL/CLAccessor.h create mode 100644 tests/CL/Helper.h create mode 100644 tests/CMakeLists.txt create mode 100644 tests/Globals.h create mode 100644 tests/IAccessor.h create mode 100644 tests/NEON/Helper.h create mode 100644 tests/NEON/NEAccessor.h create mode 100644 tests/ProgramOptions.cpp create mode 100644 tests/ProgramOptions.h create mode 100644 tests/RawTensor.cpp create mode 100644 tests/RawTensor.h create mode 100644 tests/SConscript create mode 100644 tests/TensorCache.h create mode 100644 tests/TensorLibrary.cpp create mode 100644 tests/TensorLibrary.h create mode 100644 tests/TypePrinter.h create mode 100644 tests/TypeReader.h create mode 100644 tests/Types.h create mode 100644 tests/UserConfiguration.cpp create mode 100644 tests/UserConfiguration.h create mode 100644 tests/Utils.h create mode 100644 tests/benchmark/CL/ActivationLayer.cpp create mode 100644 tests/benchmark/CL/BitwiseAnd.cpp create mode 100644 tests/benchmark/CL/CMakeLists.txt create mode 100644 tests/benchmark/CL/ConvolutionLayer.cpp create mode 100644 tests/benchmark/CL/FullyConnectedLayer.cpp create mode 100644 tests/benchmark/CL/GEMM.cpp create mode 100644 tests/benchmark/CL/GEMM.h create mode 100644 tests/benchmark/CL/NormalizationLayer.cpp create mode 100644 tests/benchmark/CL/PoolingLayer.cpp create mode 100644 tests/benchmark/CMakeLists.txt create mode 100644 tests/benchmark/Datasets.h create mode 100644 tests/benchmark/Instrument.h create mode 100644 tests/benchmark/NEON/ActivationLayer.cpp create mode 100644 tests/benchmark/NEON/BitwiseAnd.cpp create mode 100644 tests/benchmark/NEON/CMakeLists.txt create mode 100644 tests/benchmark/NEON/ConvolutionLayer.cpp create mode 100644 tests/benchmark/NEON/ConvolutionLayerDirect.cpp create mode 100644 tests/benchmark/NEON/FullyConnectedLayer.cpp create mode 100644 tests/benchmark/NEON/GEMM.cpp create mode 100644 tests/benchmark/NEON/GEMM.h create mode 100644 tests/benchmark/NEON/NormalizationLayer.cpp create mode 100644 tests/benchmark/NEON/PoolingLayer.cpp create mode 100644 tests/benchmark/PMUCounter.cpp create mode 100644 tests/benchmark/PMUCounter.h create mode 100644 tests/benchmark/PerformanceProgramOptions.cpp create mode 100644 tests/benchmark/PerformanceProgramOptions.h create mode 100644 tests/benchmark/PerformanceUserConfiguration.cpp create mode 100644 tests/benchmark/PerformanceUserConfiguration.h create mode 100644 tests/benchmark/Profiler.cpp create mode 100644 tests/benchmark/Profiler.h create mode 100644 tests/benchmark/WallClockTimer.cpp create mode 100644 tests/benchmark/WallClockTimer.h create mode 100644 tests/benchmark/common/ActivationLayer.h create mode 100644 tests/benchmark/common/ConvolutionLayer.h create mode 100644 tests/benchmark/common/FullyConnectedLayer.h create mode 100644 tests/benchmark/common/NormalizationLayer.h create mode 100644 tests/benchmark/common/PoolingLayer.h create mode 100644 tests/benchmark/main.cpp create mode 100644 tests/benchmark/system_tests/CL/AlexNet.cpp create mode 100644 tests/benchmark/system_tests/CL/LeNet5.cpp create mode 100644 tests/benchmark/system_tests/NEON/AlexNet.cpp create mode 100644 tests/benchmark/system_tests/NEON/LeNet5.cpp create mode 100644 tests/benchmark/system_tests/common/AlexNet.h create mode 100644 tests/benchmark/system_tests/common/LeNet5.h create mode 100644 tests/boost_wrapper.h create mode 100644 tests/dataset/ActivationFunctionDataset.h create mode 100644 tests/dataset/ActivationLayerDataset.h create mode 100644 tests/dataset/BatchNormalizationLayerDataset.h create mode 100644 tests/dataset/BorderModeDataset.h create mode 100644 tests/dataset/ConvertPolicyDataset.h create mode 100644 tests/dataset/ConvolutionLayerDataset.h create mode 100644 tests/dataset/DataTypeDatasets.h create mode 100644 tests/dataset/FullyConnectedLayerDataset.h create mode 100644 tests/dataset/GEMMDataset.h create mode 100644 tests/dataset/GenericDataset.h create mode 100644 tests/dataset/ImageDatasets.h create mode 100644 tests/dataset/InterpolationPolicyDataset.h create mode 100644 tests/dataset/NormalizationLayerDataset.h create mode 100644 tests/dataset/NormalizationTypeDataset.h create mode 100644 tests/dataset/PoolingLayerDataset.h create mode 100644 tests/dataset/RoundingPolicyDataset.h create mode 100644 tests/dataset/ShapeDatasets.h create mode 100644 tests/dataset/ThresholdDataset.h create mode 100644 tests/model_objects/AlexNet.h create mode 100644 tests/model_objects/LeNet5.h create mode 100644 tests/validation/CL/BitwiseAnd.cpp create mode 100644 tests/validation/CL/CLFixture.cpp create mode 100644 tests/validation/CL/CLFixture.h create mode 100644 tests/validation/CL/CMakeLists.txt create mode 100644 tests/validation/CL/DepthConvert.cpp create mode 100644 tests/validation/CL/FillBorder.cpp create mode 100644 tests/validation/CL/Threshold.cpp create mode 100644 tests/validation/CMakeLists.txt create mode 100644 tests/validation/Datasets.h create mode 100644 tests/validation/FixedPoint.h create mode 100644 tests/validation/Helpers.h create mode 100644 tests/validation/NEON/AbsoluteDifference.cpp create mode 100644 tests/validation/NEON/Accumulate.cpp create mode 100644 tests/validation/NEON/AccumulateSquared.cpp create mode 100644 tests/validation/NEON/AccumulateWeighted.cpp create mode 100644 tests/validation/NEON/ActivationLayer.cpp create mode 100644 tests/validation/NEON/ArithmeticAddition.cpp create mode 100644 tests/validation/NEON/ArithmeticSubtraction.cpp create mode 100644 tests/validation/NEON/BatchNormalizationLayer.cpp create mode 100644 tests/validation/NEON/BitwiseAnd.cpp create mode 100644 tests/validation/NEON/BitwiseNot.cpp create mode 100644 tests/validation/NEON/BitwiseOr.cpp create mode 100644 tests/validation/NEON/BitwiseXor.cpp create mode 100644 tests/validation/NEON/Box3x3.cpp create mode 100644 tests/validation/NEON/CMakeLists.txt create mode 100644 tests/validation/NEON/ConvolutionLayer.cpp create mode 100644 tests/validation/NEON/ConvolutionLayerDirect.cpp create mode 100644 tests/validation/NEON/DepthConvert.cpp create mode 100644 tests/validation/NEON/FillBorder.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Exp_QS8.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Log_QS8.cpp create mode 100644 tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp create mode 100644 tests/validation/NEON/FullyConnectedLayer.cpp create mode 100644 tests/validation/NEON/GEMM.cpp create mode 100644 tests/validation/NEON/IntegralImage.cpp create mode 100644 tests/validation/NEON/NormalizationLayer.cpp create mode 100644 tests/validation/NEON/PixelWiseMultiplication.cpp create mode 100644 tests/validation/NEON/Pooling/PoolingLayer.cpp create mode 100644 tests/validation/NEON/SoftmaxLayer.cpp create mode 100644 tests/validation/NEON/Threshold.cpp create mode 100644 tests/validation/Reference.cpp create mode 100644 tests/validation/Reference.h create mode 100644 tests/validation/ReferenceCPP.cpp create mode 100644 tests/validation/ReferenceCPP.h create mode 100644 tests/validation/Tensor.h create mode 100644 tests/validation/TensorFactory.h create mode 100644 tests/validation/TensorOperations.h create mode 100644 tests/validation/TensorVisitors.h create mode 100644 tests/validation/UNIT/CMakeLists.txt create mode 100644 tests/validation/UNIT/FixedPoint.cpp create mode 100644 tests/validation/UNIT/TensorInfo.cpp create mode 100644 tests/validation/UNIT/TensorShape.cpp create mode 100644 tests/validation/UNIT/Utils.cpp create mode 100644 tests/validation/Validation.cpp create mode 100644 tests/validation/Validation.h create mode 100644 tests/validation/ValidationProgramOptions.cpp create mode 100644 tests/validation/ValidationProgramOptions.h create mode 100644 tests/validation/ValidationUserConfiguration.h create mode 100644 tests/validation/main.cpp create mode 100644 tests/validation/system_tests/CL/AlexNet.cpp create mode 100644 tests/validation/system_tests/CL/LeNet5.cpp create mode 100644 tests/validation/system_tests/NEON/AlexNet.cpp create mode 100644 tests/validation/system_tests/NEON/LeNet5.cpp create mode 100644 utils/Utils.cpp create mode 100644 utils/Utils.h diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..01a1f0eadc --- /dev/null +++ b/.clang-format @@ -0,0 +1,48 @@ +--- +Language: Cpp +AccessModifierOffset: '0' +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: 'true' +AlignConsecutiveDeclarations: 'true' +AlignEscapedNewlinesLeft: 'true' +AlignTrailingComments: 'true' +AllowShortBlocksOnASingleLine: 'false' +AllowShortCaseLabelsOnASingleLine: 'false' +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: 'false' +AllowShortLoopsOnASingleLine: 'false' +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: 'true' +AlwaysBreakTemplateDeclarations: 'true' +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Allman +BreakBeforeTernaryOperators: 'false' +BreakConstructorInitializersBeforeComma: 'false' +#BreakStringLiterals: 'true' +ConstructorInitializerAllOnOneLineOrOnePerLine: 'true' +Cpp11BracedListStyle: 'false' +DerivePointerAlignment: 'false' +IndentCaseLabels: 'true' +IndentWidth: '4' +IndentWrappedFunctionNames: 'false' +KeepEmptyLinesAtTheStartOfBlocks: 'false' +MaxEmptyLinesToKeep: '1' +NamespaceIndentation: None +PointerAlignment: Right +SortIncludes: 'true' +SpaceAfterCStyleCast: 'false' +SpaceBeforeAssignmentOperators: 'true' +SpaceBeforeParens: Never +SpaceInEmptyParentheses: 'false' +SpacesInAngles: 'false' +SpacesInCStyleCastParentheses: 'false' +SpacesInParentheses: 'false' +SpacesInSquareBrackets: 'false' +Standard: Cpp11 +TabWidth: '4' +UseTab: Never +ReflowComments: 'false' +ContinuationIndentWidth: '4' +ColumnLimit: 0 +--- diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000000..7ee4757259 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,410 @@ +--- +Checks: 'clang-diagnostic-*,clang-analyzer-*,*,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-cppcoreguidelines-pro-bounds-array-to-pointer-decay,-cppcoreguidelines-pro-bounds-constant-array-index,-cert-err58-cpp,-cppcoreguidelines-pro-type-reinterpret-cast,-google-runtime-references,-google-build-using-namespace,-readability-redundant-member-init,-readability-redundant-declaration,-readability-else-after-return,-performance-type-promotion-in-math-fn,-cert-err60-cpp' +WarningsAsErrors: '' +HeaderFilterRegex: '' +AnalyzeTemporaryDtors: false +CheckOptions: + - key: cert-dcl59-cpp.HeaderFileExtensions + value: h,hh,hpp,hxx + - key: cert-err09-cpp.CheckThrowTemporaries + value: '1' + - key: cert-err61-cpp.CheckThrowTemporaries + value: '1' + - key: cert-oop11-cpp.IncludeStyle + value: llvm + - key: cppcoreguidelines-pro-bounds-constant-array-index.GslHeader + value: '' + - key: cppcoreguidelines-pro-bounds-constant-array-index.IncludeStyle + value: '0' + - key: cppcoreguidelines-pro-type-member-init.IgnoreArrays + value: '0' + - key: google-build-namespaces.HeaderFileExtensions + value: h,hh,hpp,hxx + - key: google-global-names-in-headers.HeaderFileExtensions + value: h + - key: google-readability-braces-around-statements.ShortStatementLines + value: '1' + - key: google-readability-function-size.BranchThreshold + value: '4294967295' + - key: google-readability-function-size.LineThreshold + value: '4294967295' + - key: google-readability-function-size.StatementThreshold + value: '800' + - key: google-readability-namespace-comments.ShortNamespaceLines + value: '10' + - key: google-readability-namespace-comments.SpacesBeforeComments + value: '2' + - key: google-runtime-int.SignedTypePrefix + value: int + - key: google-runtime-int.TypeSuffix + value: '' + - key: google-runtime-int.UnsignedTypePrefix + value: uint + - key: google-runtime-references.WhiteListTypes + value: '' + - key: llvm-header-guard.HeaderFileExtensions + value: ',h,hh,hpp,hxx' + - key: llvm-namespace-comment.ShortNamespaceLines + value: '1' + - key: llvm-namespace-comment.SpacesBeforeComments + value: '1' + - key: misc-argument-comment.StrictMode + value: '0' + - key: misc-assert-side-effect.AssertMacros + value: assert + - key: misc-assert-side-effect.CheckFunctionCalls + value: '0' + - key: misc-dangling-handle.HandleClasses + value: 'std::basic_string_view;std::experimental::basic_string_view' + - key: misc-definitions-in-headers.HeaderFileExtensions + value: ',h,hh,hpp,hxx' + - key: misc-definitions-in-headers.UseHeaderFileExtension + value: '1' + - key: misc-misplaced-widening-cast.CheckImplicitCasts + value: '0' + - key: misc-move-constructor-init.IncludeStyle + value: llvm + - key: misc-sizeof-expression.WarnOnSizeOfCompareToConstant + value: '1' + - key: misc-sizeof-expression.WarnOnSizeOfConstant + value: '1' + - key: misc-sizeof-expression.WarnOnSizeOfThis + value: '1' + - key: misc-string-constructor.LargeLengthThreshold + value: '8388608' + - key: misc-string-constructor.WarnOnLargeLength + value: '1' + - key: misc-suspicious-enum-usage.StrictMode + value: '0' + - key: misc-suspicious-missing-comma.MaxConcatenatedTokens + value: '5' + - key: misc-suspicious-missing-comma.RatioThreshold + value: '0.200000' + - key: misc-suspicious-missing-comma.SizeThreshold + value: '5' + - key: misc-suspicious-string-compare.StringCompareLikeFunctions + value: '' + - key: misc-suspicious-string-compare.WarnOnImplicitComparison + value: '1' + - key: misc-suspicious-string-compare.WarnOnLogicalNotComparison + value: '0' + - key: misc-throw-by-value-catch-by-reference.CheckThrowTemporaries + value: '1' + - key: modernize-loop-convert.MaxCopySize + value: '16' + - key: modernize-loop-convert.MinConfidence + value: reasonable + - key: modernize-loop-convert.NamingStyle + value: CamelCase + - key: modernize-pass-by-value.IncludeStyle + value: llvm + - key: modernize-pass-by-value.ValuesOnly + value: '0' + - key: modernize-replace-auto-ptr.IncludeStyle + value: llvm + - key: modernize-use-auto.RemoveStars + value: '0' + - key: modernize-use-default-member-init.UseAssignment + value: '0' + - key: modernize-use-emplace.ContainersWithPushBack + value: '::std::vector;::std::list;::std::deque' + - key: modernize-use-emplace.SmartPointers + value: '::std::shared_ptr;::std::unique_ptr;::std::auto_ptr;::std::weak_ptr' + - key: modernize-use-nullptr.NullMacros + value: 'NULL' + - key: modernize-use-transparent-functors.SafeMode + value: '0' + - key: performance-faster-string-find.StringLikeClasses + value: 'std::basic_string' + - key: performance-for-range-copy.WarnOnAllAutoCopies + value: '0' + - key: performance-inefficient-string-concatenation.StrictMode + value: '0' + - key: performance-type-promotion-in-math-fn.IncludeStyle + value: llvm + - key: performance-unnecessary-value-param.IncludeStyle + value: llvm + - key: readability-braces-around-statements.ShortStatementLines + value: '0' + - key: readability-function-size.BranchThreshold + value: '4294967295' + - key: readability-function-size.LineThreshold + value: '4294967295' + - key: readability-function-size.StatementThreshold + value: '800' + - key: readability-identifier-naming.AbstractClassCase + value: aNy_CasE + - key: readability-identifier-naming.AbstractClassPrefix + value: '' + - key: readability-identifier-naming.AbstractClassSuffix + value: '' + - key: readability-identifier-naming.ClassCase + value: aNy_CasE + - key: readability-identifier-naming.ClassConstantCase + value: aNy_CasE + - key: readability-identifier-naming.ClassConstantPrefix + value: '' + - key: readability-identifier-naming.ClassConstantSuffix + value: '' + - key: readability-identifier-naming.ClassMemberCase + value: aNy_CasE + - key: readability-identifier-naming.ClassMemberPrefix + value: '' + - key: readability-identifier-naming.ClassMemberSuffix + value: '' + - key: readability-identifier-naming.ClassMethodCase + value: aNy_CasE + - key: readability-identifier-naming.ClassMethodPrefix + value: '' + - key: readability-identifier-naming.ClassMethodSuffix + value: '' + - key: readability-identifier-naming.ClassPrefix + value: '' + - key: readability-identifier-naming.ClassSuffix + value: '' + - key: readability-identifier-naming.ConstantCase + value: aNy_CasE + - key: readability-identifier-naming.ConstantMemberCase + value: aNy_CasE + - key: readability-identifier-naming.ConstantMemberPrefix + value: '' + - key: readability-identifier-naming.ConstantMemberSuffix + value: '' + - key: readability-identifier-naming.ConstantParameterCase + value: aNy_CasE + - key: readability-identifier-naming.ConstantParameterPrefix + value: '' + - key: readability-identifier-naming.ConstantParameterSuffix + value: '' + - key: readability-identifier-naming.ConstantPrefix + value: '' + - key: readability-identifier-naming.ConstantSuffix + value: '' + - key: readability-identifier-naming.ConstexprFunctionCase + value: aNy_CasE + - key: readability-identifier-naming.ConstexprFunctionPrefix + value: '' + - key: readability-identifier-naming.ConstexprFunctionSuffix + value: '' + - key: readability-identifier-naming.ConstexprMethodCase + value: aNy_CasE + - key: readability-identifier-naming.ConstexprMethodPrefix + value: '' + - key: readability-identifier-naming.ConstexprMethodSuffix + value: '' + - key: readability-identifier-naming.ConstexprVariableCase + value: aNy_CasE + - key: readability-identifier-naming.ConstexprVariablePrefix + value: '' + - key: readability-identifier-naming.ConstexprVariableSuffix + value: '' + - key: readability-identifier-naming.EnumCase + value: aNy_CasE + - key: readability-identifier-naming.EnumConstantCase + value: aNy_CasE + - key: readability-identifier-naming.EnumConstantPrefix + value: '' + - key: readability-identifier-naming.EnumConstantSuffix + value: '' + - key: readability-identifier-naming.EnumPrefix + value: '' + - key: readability-identifier-naming.EnumSuffix + value: '' + - key: readability-identifier-naming.FunctionCase + value: aNy_CasE + - key: readability-identifier-naming.FunctionPrefix + value: '' + - key: readability-identifier-naming.FunctionSuffix + value: '' + - key: readability-identifier-naming.GlobalConstantCase + value: aNy_CasE + - key: readability-identifier-naming.GlobalConstantPrefix + value: '' + - key: readability-identifier-naming.GlobalConstantSuffix + value: '' + - key: readability-identifier-naming.GlobalFunctionCase + value: aNy_CasE + - key: readability-identifier-naming.GlobalFunctionPrefix + value: '' + - key: readability-identifier-naming.GlobalFunctionSuffix + value: '' + - key: readability-identifier-naming.GlobalVariableCase + value: aNy_CasE + - key: readability-identifier-naming.GlobalVariablePrefix + value: '' + - key: readability-identifier-naming.GlobalVariableSuffix + value: '' + - key: readability-identifier-naming.IgnoreFailedSplit + value: '0' + - key: readability-identifier-naming.InlineNamespaceCase + value: aNy_CasE + - key: readability-identifier-naming.InlineNamespacePrefix + value: '' + - key: readability-identifier-naming.InlineNamespaceSuffix + value: '' + - key: readability-identifier-naming.LocalConstantCase + value: aNy_CasE + - key: readability-identifier-naming.LocalConstantPrefix + value: '' + - key: readability-identifier-naming.LocalConstantSuffix + value: '' + - key: readability-identifier-naming.LocalVariableCase + value: aNy_CasE + - key: readability-identifier-naming.LocalVariablePrefix + value: '' + - key: readability-identifier-naming.LocalVariableSuffix + value: '' + - key: readability-identifier-naming.MacroDefinitionCase + value: aNy_CasE + - key: readability-identifier-naming.MacroDefinitionPrefix + value: '' + - key: readability-identifier-naming.MacroDefinitionSuffix + value: '' + - key: readability-identifier-naming.MemberCase + value: aNy_CasE + - key: readability-identifier-naming.MemberPrefix + value: '' + - key: readability-identifier-naming.MemberSuffix + value: '' + - key: readability-identifier-naming.MethodCase + value: aNy_CasE + - key: readability-identifier-naming.MethodPrefix + value: '' + - key: readability-identifier-naming.MethodSuffix + value: '' + - key: readability-identifier-naming.NamespaceCase + value: aNy_CasE + - key: readability-identifier-naming.NamespacePrefix + value: '' + - key: readability-identifier-naming.NamespaceSuffix + value: '' + - key: readability-identifier-naming.ParameterCase + value: aNy_CasE + - key: readability-identifier-naming.ParameterPackCase + value: aNy_CasE + - key: readability-identifier-naming.ParameterPackPrefix + value: '' + - key: readability-identifier-naming.ParameterPackSuffix + value: '' + - key: readability-identifier-naming.ParameterPrefix + value: '' + - key: readability-identifier-naming.ParameterSuffix + value: '' + - key: readability-identifier-naming.PrivateMemberCase + value: aNy_CasE + - key: readability-identifier-naming.PrivateMemberPrefix + value: '' + - key: readability-identifier-naming.PrivateMemberSuffix + value: '' + - key: readability-identifier-naming.PrivateMethodCase + value: aNy_CasE + - key: readability-identifier-naming.PrivateMethodPrefix + value: '' + - key: readability-identifier-naming.PrivateMethodSuffix + value: '' + - key: readability-identifier-naming.ProtectedMemberCase + value: aNy_CasE + - key: readability-identifier-naming.ProtectedMemberPrefix + value: '' + - key: readability-identifier-naming.ProtectedMemberSuffix + value: '' + - key: readability-identifier-naming.ProtectedMethodCase + value: aNy_CasE + - key: readability-identifier-naming.ProtectedMethodPrefix + value: '' + - key: readability-identifier-naming.ProtectedMethodSuffix + value: '' + - key: readability-identifier-naming.PublicMemberCase + value: aNy_CasE + - key: readability-identifier-naming.PublicMemberPrefix + value: '' + - key: readability-identifier-naming.PublicMemberSuffix + value: '' + - key: readability-identifier-naming.PublicMethodCase + value: aNy_CasE + - key: readability-identifier-naming.PublicMethodPrefix + value: '' + - key: readability-identifier-naming.PublicMethodSuffix + value: '' + - key: readability-identifier-naming.StaticConstantCase + value: aNy_CasE + - key: readability-identifier-naming.StaticConstantPrefix + value: '' + - key: readability-identifier-naming.StaticConstantSuffix + value: '' + - key: readability-identifier-naming.StaticVariableCase + value: aNy_CasE + - key: readability-identifier-naming.StaticVariablePrefix + value: '' + - key: readability-identifier-naming.StaticVariableSuffix + value: '' + - key: readability-identifier-naming.StructCase + value: aNy_CasE + - key: readability-identifier-naming.StructPrefix + value: '' + - key: readability-identifier-naming.StructSuffix + value: '' + - key: readability-identifier-naming.TemplateParameterCase + value: aNy_CasE + - key: readability-identifier-naming.TemplateParameterPrefix + value: '' + - key: readability-identifier-naming.TemplateParameterSuffix + value: '' + - key: readability-identifier-naming.TemplateTemplateParameterCase + value: aNy_CasE + - key: readability-identifier-naming.TemplateTemplateParameterPrefix + value: '' + - key: readability-identifier-naming.TemplateTemplateParameterSuffix + value: '' + - key: readability-identifier-naming.TypeAliasCase + value: aNy_CasE + - key: readability-identifier-naming.TypeAliasPrefix + value: '' + - key: readability-identifier-naming.TypeAliasSuffix + value: '' + - key: readability-identifier-naming.TypeTemplateParameterCase + value: aNy_CasE + - key: readability-identifier-naming.TypeTemplateParameterPrefix + value: '' + - key: readability-identifier-naming.TypeTemplateParameterSuffix + value: '' + - key: readability-identifier-naming.TypedefCase + value: aNy_CasE + - key: readability-identifier-naming.TypedefPrefix + value: '' + - key: readability-identifier-naming.TypedefSuffix + value: '' + - key: readability-identifier-naming.UnionCase + value: aNy_CasE + - key: readability-identifier-naming.UnionPrefix + value: '' + - key: readability-identifier-naming.UnionSuffix + value: '' + - key: readability-identifier-naming.ValueTemplateParameterCase + value: aNy_CasE + - key: readability-identifier-naming.ValueTemplateParameterPrefix + value: '' + - key: readability-identifier-naming.ValueTemplateParameterSuffix + value: '' + - key: readability-identifier-naming.VariableCase + value: aNy_CasE + - key: readability-identifier-naming.VariablePrefix + value: '' + - key: readability-identifier-naming.VariableSuffix + value: '' + - key: readability-identifier-naming.VirtualMethodCase + value: aNy_CasE + - key: readability-identifier-naming.VirtualMethodPrefix + value: '' + - key: readability-identifier-naming.VirtualMethodSuffix + value: '' + - key: readability-implicit-bool-cast.AllowConditionalIntegerCasts + value: '0' + - key: readability-implicit-bool-cast.AllowConditionalPointerCasts + value: '0' + - key: readability-simplify-boolean-expr.ChainedConditionalAssignment + value: '0' + - key: readability-simplify-boolean-expr.ChainedConditionalReturn + value: '0' +... + diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..a15c40f0d1 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,8 @@ +[submodule "data"] + path = data + url = ../data + branch = master +[submodule "3rdparty"] + path = 3rdparty + url = ../3rdparty + branch = master diff --git a/3rdparty b/3rdparty new file mode 160000 index 0000000000..e2e08d6264 --- /dev/null +++ b/3rdparty @@ -0,0 +1 @@ +Subproject commit e2e08d62647278f63993b43dd84d16484f111a7a diff --git a/SConscript b/SConscript new file mode 100644 index 0000000000..970466e5ae --- /dev/null +++ b/SConscript @@ -0,0 +1,199 @@ +# Copyright (c) 2016, 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import collections +import os.path +import re +import subprocess + +VERSION = "v0.0-unreleased" +SONAME_VERSION="1.0.0" + +Import('env') +Import('vars') + +def build_library(name, sources, static=False): + if static: + obj = arm_compute_env.StaticLibrary(name, source=sources) + else: + if env['set_soname']: + obj = arm_compute_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION) + + symlinks = [] + # Manually delete symlinks or SCons will get confused: + directory = os.path.dirname(obj[0].path) + library_prefix = obj[0].path[:-(1 + len(SONAME_VERSION))] + real_lib = "%s.%s" % (library_prefix, SONAME_VERSION) + + for f in Glob("#%s*" % library_prefix): + if str(f) != real_lib: + symlinks.append("%s/%s" % (directory,str(f))) + + clean = arm_compute_env.Command('clean-%s' % str(obj[0]), [], Delete(symlinks)) + Default(clean) + Depends(obj, clean) + else: + obj = arm_compute_env.SharedLibrary(name, source=sources) + + Default(obj) + return obj + +def resolve_includes(target, source, env): + # File collection + FileEntry = collections.namedtuple('FileEntry', 'target_name file_contents') + + # Include pattern + pattern = re.compile("#include \"(.*)\"") + + # Get file contents + files = [] + for i in range(len(source)): + src = source[i] + dst = target[i] + contents = src.get_contents().splitlines() + entry = FileEntry(target_name=dst, file_contents=contents) + files.append((os.path.basename(src.get_path()),entry)) + + # Create dictionary of tupled list + files_dict = dict(files) + + # Check for includes (can only be files in the same folder) + final_files = [] + for file in files: + done = False + tmp_file = file[1].file_contents + while not done: + file_count = 0 + updated_file = [] + for line in tmp_file: + found = pattern.search(line) + if found: + include_file = found.group(1) + data = files_dict[include_file].file_contents + updated_file.extend(data) + else: + updated_file.append(line) + file_count += 1 + + # Check if all include are replaced. + if file_count == len(tmp_file): + done = True + + # Update temp file + tmp_file = updated_file + + # Append and prepend string literal identifiers and add expanded file to final list + tmp_file.insert(0, "R\"(\n") + tmp_file.append("\n)\"") + entry = FileEntry(target_name=file[1].target_name, file_contents=tmp_file) + final_files.append((file[0], entry)) + + # Write output files + for file in final_files: + with open(file[1].target_name.get_path(), 'w+') as out_file: + out_file.write( "\n".join( file[1].file_contents )) + +def create_version_file(target, source, env): +# Generate string with build options library version to embed in the library: + try: + git_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]) + except (OSError, subprocess.CalledProcessError): + git_hash="unknown" + + version_filename = "%s/arm_compute_version.embed" % Dir("src/core").path + build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION, vars.args, git_hash.strip()) + with open(target[0].get_path(), "w") as fd: + fd.write(build_info) + + +arm_compute_env = env.Clone() + +generate_embed = [ arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) ] +arm_compute_env.Append(CPPPATH =[Dir("./src/core/").path] ) + +if env["os"] not in ["android", "bare_metal"]: + arm_compute_env.Append(LIBS = ['pthread']) + +arm_compute_env.Append(LIBS = ['dl']) + +core_files = Glob('src/core/*.cpp') +core_files += Glob('src/core/CPP/*.cpp') +core_files += Glob('src/core/CPP/kernels/*.cpp') + +files = Glob('src/runtime/*.cpp') +# CLHarrisCorners uses the Scheduler to run CPP kernels +files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp') + +if env['cppthreads']: + files += Glob('src/runtime/CPP/CPPScheduler.cpp') + +if env['openmp']: + files += Glob('src/runtime/OMP/OMPScheduler.cpp') + +if env['opencl']: + core_files += Glob('src/core/CL/*.cpp') + core_files += Glob('src/core/CL/kernels/*.cpp') + + files += Glob('src/runtime/CL/*.cpp') + files += Glob('src/runtime/CL/functions/*.cpp') + + # Generate embed files + if env['embed_kernels']: + cl_files = Glob('src/core/CL/cl_kernels/*.cl') + cl_files += Glob('src/core/CL/cl_kernels/*.h') + + embed_files = [ f.get_path()+"embed" for f in cl_files ] + arm_compute_env.Append(CPPPATH =[Dir("./src/core/CL/").path] ) + + generate_embed.append(arm_compute_env.Command(embed_files, cl_files, action=resolve_includes)) + +if env['neon']: + core_files += Glob('src/core/NEON/*.cpp') + core_files += Glob('src/core/NEON/kernels/*.cpp') + + files += Glob('src/runtime/NEON/*.cpp') + files += Glob('src/runtime/NEON/functions/*.cpp') + +static_core_objects = [arm_compute_env.StaticObject(f) for f in core_files] +shared_core_objects = [arm_compute_env.SharedObject(f) for f in core_files] + +arm_compute_core_a = build_library('arm_compute_core-static', static_core_objects, static=True) +Export('arm_compute_core_a') + +if env['os'] != 'bare_metal': + arm_compute_core_so = build_library('arm_compute_core', shared_core_objects, static=False) + Export('arm_compute_core_so') + +shared_objects = [arm_compute_env.SharedObject(f) for f in files] +static_objects = [arm_compute_env.StaticObject(f) for f in files] + +arm_compute_a = build_library('arm_compute-static', static_core_objects + static_objects, static=True) +Export('arm_compute_a') + +if env['os'] != 'bare_metal': + arm_compute_so = build_library('arm_compute', shared_core_objects + shared_objects, static=False) + Export('arm_compute_so') + +alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_so]) +Default(alias) + +Default(generate_embed) +Depends([alias,arm_compute_core_so, arm_compute_core_a], generate_embed) diff --git a/SConstruct b/SConstruct new file mode 100644 index 0000000000..3927e3acc9 --- /dev/null +++ b/SConstruct @@ -0,0 +1,208 @@ +# Copyright (c) 2016, 2017 ARM Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import SCons +import os +import subprocess + +def version_at_least(version, required): + end = min(len(version), len(required)) + + for i in range(0, end, 2): + if int(version[i]) < int(required[i]): + return False + elif int(version[i]) > int(required[i]): + return True + + return True + +vars = Variables("scons") +vars.AddVariables( + BoolVariable("debug", "Debug", False), + BoolVariable("asserts", "Enable asserts (this flag is forced to 1 for debug=1)", False), + EnumVariable("arch", "Target Architecture", "armv7a", allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "x86_32", "x86_64")), + EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "bare_metal")), + EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile")), + BoolVariable("examples", "Build example programs", True), + BoolVariable("Werror", "Enable/disable the -Werror compilation flag", True), + BoolVariable("opencl", "Enable OpenCL support", True), + BoolVariable("neon", "Enable Neon support", False), + BoolVariable("embed_kernels", "Embed OpenCL kernels in library binary", False), + BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False), + BoolVariable("openmp", "Enable OpenMP backend", False), + BoolVariable("cppthreads", "Enable C++11 threads backend", True), + PathVariable("build_dir", "Specify sub-folder for the build", ".", PathVariable.PathAccept), + ("extra_cxx_flags", "Extra CXX flags to be appended to the build command", "") +) + +env = Environment(platform="posix", variables=vars, ENV = os.environ) + +SConsignFile('build/.%s' % env['build_dir']) + +Help(vars.GenerateHelpText(env)) + +if env['neon'] and 'x86' in env['arch']: + print "Cannot compile NEON for x86" + Exit(1) + +if env['set_soname'] and not version_at_least(SCons.__version__, "2.4"): + print "Setting the library's SONAME / SHLIBVERSION requires SCons 2.4 or above" + print "Update your version of SCons or use set_soname=0" + Exit(1) + +if env['os'] == 'bare_metal': + if env['cppthreads'] or env['openmp']: + print("ERROR: OpenMP and C++11 threads not supported in bare_metal. Use cppthreads=0 openmp=0") + Exit(1) + +env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM', + '-Wextra','-Wno-unused-parameter','-pedantic','-Wdisabled-optimization','-Wformat=2', + '-Winit-self','-Wstrict-overflow=2','-Wswitch-default', + '-fpermissive','-std=gnu++11','-Wno-vla','-Woverloaded-virtual', + '-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow']) +env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP']) + +if os.environ.get('CXX', 'g++') == 'clang++': + env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags']) +else: + env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel']) + +if env['cppthreads']: + env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)]) + +if env['openmp']: + if os.environ.get('CXX', 'g++') == 'clang++': + print "Clang does not support OpenMP. Use scheduler=cpp." + Exit(1) + + env.Append(CPPDEFINES = [('ARM_COMPUTE_OPENMP_SCHEDULER', 1)]) + env.Append(CXXFLAGS = ['-fopenmp']) + env.Append(LINKFLAGS = ['-fopenmp']) + +prefix = "" +if env['arch'] == 'armv7a': + env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon']) + + if env['os'] in ['linux', 'bare_metal']: + prefix = "arm-linux-gnueabihf-" + env.Append(CXXFLAGS = ['-mfloat-abi=hard']) + elif env['os'] == 'android': + prefix = "arm-linux-androideabi-" + env.Append(CXXFLAGS = ['-mfloat-abi=softfp']) +elif env['arch'] == 'arm64-v8a': + env.Append(CXXFLAGS = ['-march=armv8-a']) + + if env['os'] in ['linux', 'bare_metal']: + prefix = "aarch64-linux-gnu-" + elif env['os'] == 'android': + prefix = "aarch64-linux-android-" +elif env['arch'] == 'arm64-v8.2-a': + env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16+simd']) + env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16']) + + if env['os'] in ['linux', 'bare_metal']: + prefix = "aarch64-linux-gnu-" + elif env['os'] == 'android': + prefix = "aarch64-linux-android-" +elif env['arch'] == 'x86_32': + env.Append(CCFLAGS = ['-m32']) + env.Append(LINKFLAGS = ['-m32']) +elif env['arch'] == 'x86_64': + env.Append(CCFLAGS = ['-m64']) + env.Append(LINKFLAGS = ['-m64']) + +if env['build'] == 'native': + prefix = "" + +env['CC'] = prefix + os.environ.get('CC', 'gcc') +env['CXX'] = prefix + os.environ.get('CXX', 'g++') +env['LD'] = prefix + "ld" +env['AS'] = prefix + "as" +env['AR'] = prefix + "ar" +env['RANLIB'] = prefix + "ranlib" + +if not GetOption("help"): + try: + compiler_ver = subprocess.check_output([env['CXX'], "-dumpversion"]).strip() + except OSError: + print("ERROR: Compiler '%s' not found" % env['CXX']) + Exit(1) + + if os.environ.get('CXX','g++') == 'g++': + if env['arch'] == 'arm64-v8.2-a' and not version_at_least(compiler_ver, '6.2.1'): + print "GCC 6.2.1 or newer is required to compile armv8.2-a code" + Exit(1) + elif env['arch'] == 'arm64-v8a' and not version_at_least(compiler_ver, '4.9'): + print "GCC 4.9 or newer is required to compile NEON code for AArch64" + Exit(1) + + if version_at_least(compiler_ver, '6.1'): + env.Append(CXXFLAGS = ['-Wno-ignored-attributes']) + + if compiler_ver == '4.8.3': + env.Append(CXXFLAGS = ['-Wno-array-bounds']) + +if env['Werror']: + env.Append(CXXFLAGS = ['-Werror']) + +if env['os'] == 'android': + env.Append(CPPDEFINES = ['ANDROID']) + env.Append(LINKFLAGS = ['-pie', '-static-libstdc++']) +elif env['os'] == 'bare_metal': + env.Append(LINKFLAGS = ['-static']) + env.Append(CXXFLAGS = ['-fPIC']) + env.Append(CPPDEFINES = ['NO_MULTI_THREADING']) + +if env['opencl']: + if env['os'] == 'bare_metal': + print("Cannot link OpenCL statically, which is required on bare metal") + Exit(1) + + if env['embed_kernels']: + env.Append(CPPDEFINES = ['EMBEDDED_KERNELS']) + +if env['debug']: + env['asserts'] = True + env.Append(CXXFLAGS = ['-O0','-g','-gdwarf-2']) + env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED']) +else: + env.Append(CXXFLAGS = ['-O3','-ftree-vectorize']) + +if env['asserts']: + env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED']) + +env.Append(CPPPATH = ['#/include', "#"]) +env.Append(CXXFLAGS = env['extra_cxx_flags']) + +Export('vars') +Export('env') +Export('version_at_least') + +SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate=0) + +if env['opencl']: + SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0) + +if env['examples']: + SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0) + +SConscript('./tests/SConscript', variant_dir='#build/%s/tests' % env['build_dir'], duplicate=0) diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h new file mode 100644 index 0000000000..0a3344b115 --- /dev/null +++ b/arm_compute/core/AccessWindowAutoPadding.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__ +#define __ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Dummy access window. + * + * This implementation always uses the auto padding of the tensor info and + * never updates the window. The valid region is always set to cover the entire + * tensor. + * + * @note This access window is only used during the migration to the new + * padding system. It will be removed once all kernels have been ported. + * + * */ +class AccessWindowAutoPadding : public IAccessWindow +{ +public: + /** Default constructor. + * + * @param[in,out] info Tensor info of the accessed kernel. + */ + AccessWindowAutoPadding(ITensorInfo *info); + AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete; + AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete; + AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default; + AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default; + ~AccessWindowAutoPadding() = default; + + /** Set the valid region to match the entire tensor. */ + void set_valid_region(); + + /** Return a valid region that spans across the entire tensor. */ + ValidRegion compute_valid_region() const; + + // Inherited methods overridden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + +private: + ITensorInfo *_info; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H__*/ diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h new file mode 100644 index 0000000000..6dcba072c4 --- /dev/null +++ b/arm_compute/core/AccessWindowStatic.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Implementation of a static rectangular access pattern. + * + * In this implementation the access offsets and sizes are not relative to the + * current element. Instead they are considered to be absolute coordinates + * within the accessed tensor's shape. + * + * */ +class AccessWindowStatic : public IAccessWindow +{ +public: + /** Constructor for a static access pattern. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] start_x Start of the access in X direction. + * @param[in] start_y Start of the access in Y direction. + * @param[in] end_x End of the access in X direction. + * @param[in] end_y End of the access in Y direction. + */ + AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y); + + AccessWindowStatic(const AccessWindowStatic &) = delete; + AccessWindowStatic &operator=(const AccessWindowStatic &) = delete; + AccessWindowStatic(AccessWindowStatic &&) = default; + AccessWindowStatic &operator=(AccessWindowStatic &&) = default; + ~AccessWindowStatic() = default; + + /** Set the valid region based on the static access pattern and valid + * region of the inputs. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + void set_valid_region(const Window &window, const ValidRegion &input_valid_region); + + /** Compute the valid region based on the static access pattern and valid region of the inputs. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const; + + // Inherited methods overriden: + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + + ITensorInfo *_info; + int _start_x; + int _start_y; + int _end_x; + int _end_y; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_STATIC_H__*/ diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h new file mode 100644 index 0000000000..102860f9d8 --- /dev/null +++ b/arm_compute/core/AccessWindowTranspose.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Implementation of a XY-transpose access pattern. */ +class AccessWindowTranspose : public AccessWindowRectangle +{ +public: + using AccessWindowRectangle::AccessWindowRectangle; + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + using AccessWindowRectangle::compute_valid_region; + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__*/ diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h new file mode 100644 index 0000000000..26253e3f38 --- /dev/null +++ b/arm_compute/core/CL/CLHelpers.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHELPERS_H__ +#define __ARM_COMPUTE_CLHELPERS_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Helpers.h" + +#include + +namespace arm_compute +{ +enum class DataType; +enum class GPUTarget; + +/** Enable operation operations on GPUTarget enumerations */ +template <> +struct enable_bitwise_ops +{ + static constexpr bool value = true; +}; + +/** Max vector width of an OpenCL vector */ +static constexpr const unsigned int max_cl_vector_width = 16; + +/** Translates a tensor data type to the appropriate OpenCL type. + * + * @param[in] dt @ref DataType to be translated to OpenCL type. + * + * @return The string specifying the OpenCL type to be used. + */ +std::string get_cl_type_from_data_type(const DataType &dt); + +/** Translates a given gpu device target to string. + * + * @param[in] target Given gpu target. + * + * @return The string describing the target. + */ +const std::string &string_from_target(GPUTarget target); + +/** Helper function to create and return a unique_ptr pointed to a CL kernel object + * It also calls the kernel's configuration. + * + * @param[in] args All the arguments that need pass to kernel's configuration. + * + * @return A unique pointer pointed to a CL kernel object + */ +template +std::unique_ptr create_configure_kernel(T &&... args) +{ + std::unique_ptr k = arm_compute::cpp14::make_unique(); + k->configure(std::forward(args)...); + return k; +} + +/** Helper function to create and return a unique_ptr pointed to a CL kernel object + * + * @return A unique pointer pointed to a CL kernel object + */ +template +std::unique_ptr create_kernel() +{ + std::unique_ptr k = arm_compute::cpp14::make_unique(); + return k; +} + +/** Helper function to get the GPU target from CL device + * + * @param[in] device A CL device + * + * @return the GPU target + */ +GPUTarget get_target_from_device(cl::Device &device); + +/** Helper function to get the GPU arch + * + * @param[in] target GPU target + * + * @return the GPU target which shows the arch + */ +GPUTarget get_arch_from_target(GPUTarget target); +} +#endif diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h new file mode 100644 index 0000000000..c29610c252 --- /dev/null +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_H__ +#define __ARM_COMPUTE_CLKERNELLIBRARY_H__ + +#include "arm_compute/core/CL/OpenCL.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Program class */ +class Program +{ +public: + /** Default constructor. */ + Program(); + /** Construct program from source file. + * + * @param[in] context CL context used to create the program. + * @param[in] name Program name. + * @param[in] source Program source. + */ + Program(cl::Context context, std::string name, std::string source); + /** Construct program from binary file. + * + * @param[in] context CL context used to create the program. + * @param[in] device CL device for which the programs are created. + * @param[in] name Program name. + * @param[in] binary Program binary. + */ + Program(cl::Context context, cl::Device device, std::string name, std::vector binary); + /** Default Copy Constructor. */ + Program(const Program &) = default; + /** Default Move Constructor. */ + Program(Program &&) = default; + /** Default copy assignment operator. */ + Program &operator=(const Program &) = default; + /** Default move assignment operator. */ + Program &operator=(Program &&) = default; + /**Returns program name. + * + * @return Program's name. + */ + std::string name() const + { + return _name; + } + /** User-defined conversion to the underlying CL program. + * + * @return The CL program object. + */ + explicit operator cl::Program() const; + + static bool build(const cl::Program &program, const std::string &build_options = ""); + /** Build the underlying CL program. + * + * @param[in] build_options Options used to build the CL program. + * + * @return A reference to itself. + */ + cl::Program build(const std::string &build_options = "") const; + +private: + cl::Context _context; /**< Underlying CL context. */ + cl::Device _device; /**< CL device for which the programs are created. */ + bool _is_binary; /**< Create program from binary? */ + std::string _name; /**< Program name. */ + std::string _source; /**< Source code for the program. */ + std::vector _binary; /**< Binary from which to create the program. */ +}; + +/** Kernel class */ +class Kernel +{ +public: + /** Default Constructor. */ + Kernel(); + /** Default Copy Constructor. */ + Kernel(const Kernel &) = default; + /** Default Move Constructor. */ + Kernel(Kernel &&) = default; + /** Default copy assignment operator. */ + Kernel &operator=(const Kernel &) = default; + /** Default move assignment operator. */ + Kernel &operator=(Kernel &&) = default; + /** Constructor. + * + * @param[in] name Kernel name. + * @param[in] program Built program. + */ + Kernel(std::string name, const cl::Program &program); + /** Returns kernel name. + * + * @return Kernel's name. + */ + std::string name() const + { + return _name; + } + /** Returns OpenCL kernel. + * + * @return OpenCL Kernel. + */ + explicit operator cl::Kernel() const + { + return _kernel; + } + +private: + std::string _name; /**< Kernel name */ + cl::Kernel _kernel; /**< OpenCL Kernel */ +}; + +/** CLKernelLibrary class */ +class CLKernelLibrary +{ + using StringSet = std::set; + +private: + /** Default Constructor. */ + CLKernelLibrary(); + +public: + /** Prevent instances of this class from being copied. */ + CLKernelLibrary(const CLKernelLibrary &) = delete; + /** Prevent instances of this class from being copied. */ + const CLKernelLibrary &operator=(const CLKernelLibrary &) = delete; + /** Access the KernelLibrary singleton. + * @return The KernelLibrary instance. + */ + static CLKernelLibrary &get(); + /** Initialises the kernel library. + * + * @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded. + * @param[in] context (Optional) CL context used to create programs. + * @param[in] device (Optional) CL device for which the programs are created. + */ + void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault()) + { + _kernel_path = std::move(kernel_path); + _context = std::move(context); + _device = std::move(device); + } + /** Sets the path that the kernels reside in. + * + * @param[in] kernel_path Path of the kernel. + */ + void set_kernel_path(const std::string &kernel_path) + { + _kernel_path = kernel_path; + }; + /** Sets the CL context used to create programs. + * + * @note Setting the context also resets the device to the + * first one available in the new context. + * + * @param[in] context A CL context. + */ + void set_context(cl::Context context) + { + _context = std::move(context); + + const auto cl_devices = _context.getInfo(); + + if(cl_devices.empty()) + { + _device = cl::Device(); + } + else + { + _device = cl_devices[0]; + } + }; + /** Sets the CL device for which the programs are created. + * + * @param[in] device A CL device. + */ + void set_device(cl::Device device) + { + _device = std::move(device); + }; + /** Creates a kernel from the kernel library. + * + * @param[in] kernel_name Kernel name. + * @param[in] build_options_set Kernel build options as a set. + * + * @return The created kernel. + */ + Kernel create_kernel(const std::string &kernel_name, const StringSet &build_options_set = {}) const; + /** Serializes and saves programs to a binary. + * + */ + void save_binary(); + /** Load serialized binary with all the programs. + * + */ + void load_binary(); + +private: + /** Load program and its dependencies. + * + * @param[in] program_name Name of the program to load. + */ + const Program &load_program(const std::string &program_name) const; + /** Concatenates contents of a set into a single string. + * + * @param[in] s Input set to concatenate. + * + * @return Concatenated string. + */ + std::string stringify_set(const StringSet &s) const; + + cl::Context _context; /**< Underlying CL context. */ + cl::Device _device; /**< Underlying CL device. */ + std::string _kernel_path; /**< Path to the kernels folder. */ + mutable std::map _programs_map; /**< Map with all already loaded program data. */ + mutable std::map _built_programs_map; /**< Map with all already built program data. */ + static const std::map _kernel_program_map; /**< Map that associates kernel names with programs. */ + static const std::map _program_source_map; /**< Contains sources for all programs. + Used for compile-time kernel inclusion. >*/ +}; +} +#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */ diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h new file mode 100644 index 0000000000..0e9f356e52 --- /dev/null +++ b/arm_compute/core/CL/CLKernels.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLKERNELS_H__ +#define __ARM_COMPUTE_CLKERNELS_H__ + +/* Header regrouping all the CL kernels */ +#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" +#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" +#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h" +#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" +#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" +#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" +#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" +#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" +#include "arm_compute/core/CL/kernels/CLDilateKernel.h" +#include "arm_compute/core/CL/kernels/CLErodeKernel.h" +#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" +#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLRemapKernel.h" +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" +#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" +#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" + +#endif /* __ARM_COMPUTE_CLKERNELS_H__ */ diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h new file mode 100644 index 0000000000..c5643d8939 --- /dev/null +++ b/arm_compute/core/CL/CLTypes.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CL_TYPES_H__ +#define __ARM_COMPUTE_CL_TYPES_H__ + +namespace arm_compute +{ +/** Available GPU Targets */ +enum class GPUTarget +{ + GPU_ARCH_MASK = 0xF00, + MIDGARD = 0x100, + BIFROST = 0x200, + T600 = 0x110, + T700 = 0x120, + T800 = 0x130, + G70 = 0x210 +}; +} +#endif /* __ARM_COMPUTE_CL_TYPES_H__ */ diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h new file mode 100644 index 0000000000..1b676ed5a3 --- /dev/null +++ b/arm_compute/core/CL/ICLArray.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLARRAY_H__ +#define __ARM_COMPUTE_ICLARRAY_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/ITensor.h" + +namespace arm_compute +{ +/** Interface for OpenCL Array */ +template +class ICLArray : public IArray +{ +public: + /* Constructor */ + explicit ICLArray(size_t max_num_values) + : IArray(max_num_values), _mapping(nullptr) + { + } + + ICLArray(const ICLArray &) = delete; + ICLArray &operator=(const ICLArray &) = delete; + virtual ~ICLArray() = default; + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the array's data. + * + * @return A reference to an OpenCL buffer containing the array's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true) + { + _mapping = do_map(q, blocking); + } + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q) + { + do_unmap(q, _mapping); + _mapping = nullptr; + } + + // Inherited methods overridden: + T *buffer() const override + { + return reinterpret_cast(_mapping); + } + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping Pointer to the buffer to be unmapped. + */ + virtual void do_unmap(cl::CommandQueue &q, uint8_t *mapping) = 0; + +private: + uint8_t *_mapping; +}; + +using ICLKeyPointArray = ICLArray; +using ICLCoordinates2DArray = ICLArray; +using ICLDetectionWindowArray = ICLArray; +using ICLSize2DArray = ICLArray; +using ICLUInt8Array = ICLArray; +using ICLUInt16Array = ICLArray; +using ICLUInt32Array = ICLArray; +using ICLInt16Array = ICLArray; +using ICLInt32Array = ICLArray; +using ICLFloatArray = ICLArray; +} +#endif /*__ARM_COMPUTE_ICLARRAY_H__*/ diff --git a/arm_compute/core/CL/ICLDistribution1D.h b/arm_compute/core/CL/ICLDistribution1D.h new file mode 100644 index 0000000000..8fbbbbf548 --- /dev/null +++ b/arm_compute/core/CL/ICLDistribution1D.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution1D.h" + +#include +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** ICLDistribution1D interface class */ +class ICLDistribution1D : public IDistribution1D +{ +public: + /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + ICLDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLDistribution1D(const ICLDistribution1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + const ICLDistribution1D &operator=(const ICLDistribution1D &) = delete; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the distribution's data. + * + * @return A reference to an OpenCL buffer containing the distribution's data. + */ + virtual cl::Buffer &cl_buffer() = 0; + // Inherited methods overridden: + uint32_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint32_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +protected: + uint32_t *_mapping; /**< The distribution data. */ +}; +} +#endif /* __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ */ diff --git a/arm_compute/core/CL/ICLHOG.h b/arm_compute/core/CL/ICLHOG.h new file mode 100644 index 0000000000..a3d2fb4a57 --- /dev/null +++ b/arm_compute/core/CL/ICLHOG.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLHOG_H__ +#define __ARM_COMPUTE_ICLHOG_H__ + +#include "arm_compute/core/IHOG.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL HOG data-object */ +class ICLHOG : public IHOG +{ +public: + /** Default constructor */ + ICLHOG(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLHOG(const ICLHOG &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLHOG &operator=(const ICLHOG &) = delete; + /** Allow instances of this class to be moved */ + ICLHOG(ICLHOG &&) = default; + /** Allow instances of this class to be moved */ + ICLHOG &operator=(ICLHOG &&) = default; + /** Default destructor */ + virtual ~ICLHOG() = default; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the hog's descriptor + * + * @return A reference to an OpenCL buffer containing the hog's descriptor + */ + virtual const cl::Buffer &cl_buffer() const = 0; + + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true); + + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + + /** Interface to be implemented by the child class to free the allocated cl buffer. + * + * @warning The buffer must have been allocated previously. Otherwise calling the function will fail. + */ + virtual void free() = 0; + + // Inherited methods overridden: + float *descriptor() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; +} +#endif /*__ARM_COMPUTE_ICLHOG_H__ */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h new file mode 100644 index 0000000000..72c963d11b --- /dev/null +++ b/arm_compute/core/CL/ICLKernel.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLKERNEL_H__ +#define __ARM_COMPUTE_ICLKERNEL_H__ + +#include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class Window; + +/** Common interface for all the OpenCL kernels */ +class ICLKernel : public IKernel +{ +public: + /** Constructor */ + ICLKernel(); + /** Returns a reference to the OpenCL kernel of this object. + * + * @return A reference to the OpenCL kernel of this object. + */ + cl::Kernel &kernel(); + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per 1D tensor object. + * + * @return The number of arguments enqueues per 1D tensor object. + */ + unsigned int num_arguments_per_1D_tensor() const; + /** Returns the number of arguments enqueued per 2D tensor object. + * + * @return The number of arguments enqueues per 2D tensor object. + */ + unsigned int num_arguments_per_2D_tensor() const; + /** Returns the number of arguments enqueued per 3D tensor object. + * + * @return The number of arguments enqueues per 3D tensor object. + */ + unsigned int num_arguments_per_3D_tensor() const; + /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. + * + * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + virtual void run(const Window &window, cl::CommandQueue &queue) = 0; + /** Add the passed parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set. + * @param[in] value Value to set as an argument of the object's kernel. + */ + template + void add_argument(unsigned int &idx, T value) + { + _kernel.setArg(idx++, value); + } + + /** Set the targeted GPU architecture + * + * @param[in] target The targeted GPU architecture + */ + void set_target(GPUTarget target); + + /** Set the targeted GPU architecture according to the CL device + * + * @param[in] device A CL device + */ + void set_target(cl::Device &device); + + /** Get the targeted GPU architecture + * + * @return The targeted GPU architecture. + */ + GPUTarget get_target() const; + +private: + /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); + /** Returns the number of arguments enqueued per tensor object. + * + * @return The number of arguments enqueued per tensor object. + */ + template + unsigned int num_arguments_per_tensor() const; + +protected: + cl::Kernel _kernel; /**< OpenCL kernel to run */ + cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ + GPUTarget _target; /**< The targeted GPU */ +}; + +/** Add the kernel to the command queue with the given window. + * + * @note Depending on the size of the window, this might translate into several jobs being enqueued. + * + * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. + * + * @param[in,out] queue OpenCL command queue. + * @param[in] kernel Kernel to enqueue + * @param[in] window Window the kernel has to process. + * @param[in] lws_hint Local workgroup size requested, by default (128,1) + * + * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. + */ +void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = cl::Range_128_1); +} +#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLLut.h b/arm_compute/core/CL/ICLLut.h new file mode 100644 index 0000000000..2016ebb5c3 --- /dev/null +++ b/arm_compute/core/CL/ICLLut.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLLUT_H__ +#define __ARM_COMPUTE_ICLLUT_H__ + +#include "arm_compute/core/ILut.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL LUT */ +class ICLLut : public ILut +{ +public: + ICLLut(); + ICLLut(const ICLLut &) = delete; + ICLLut &operator=(const ICLLut &) = delete; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the lut's data. + * + * @return A reference to an OpenCL buffer containing the lut's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + + // Inherited methods overridden: + uint8_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; +} +#endif /*__ARM_COMPUTE_ICLLUT_H__ */ diff --git a/arm_compute/core/CL/ICLMultiHOG.h b/arm_compute/core/CL/ICLMultiHOG.h new file mode 100644 index 0000000000..9f3c775230 --- /dev/null +++ b/arm_compute/core/CL/ICLMultiHOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLMULTIHOG_H__ +#define __ARM_COMPUTE_ICLMULTIHOG_H__ + +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/IMultiHOG.h" + +namespace arm_compute +{ +/** Interface for storing multiple HOG data-objects */ +class ICLMultiHOG : public IMultiHOG +{ +public: + /** Return a pointer to the requested OpenCL HOG model + * + * @param[in] index The index of the wanted OpenCL HOG model. + * + * @return A pointer pointed to the HOG model + */ + virtual ICLHOG *cl_model(size_t index) = 0; + /** Return a constant pointer to the requested OpenCL HOG model + * + * @param[in] index The index of the wanted OpenCL HOG model. + * + * @return A constant pointer pointed to the OpenCL HOG model + */ + virtual const ICLHOG *cl_model(size_t index) const = 0; + + // Inherited methods overridden: + IHOG *model(size_t index) override; + const IHOG *model(size_t index) const override; +}; +} +#endif /*__ARM_COMPUTE_ICLMULTIHOG_H__ */ diff --git a/arm_compute/core/CL/ICLMultiImage.h b/arm_compute/core/CL/ICLMultiImage.h new file mode 100644 index 0000000000..e8705b1824 --- /dev/null +++ b/arm_compute/core/CL/ICLMultiImage.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLMULTIIMAGE_H__ +#define __ARM_COMPUTE_ICLMULTIIMAGE_H__ + +#include "arm_compute/core/IMultiImage.h" + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for OpenCL multi-planar images */ +class ICLMultiImage : public IMultiImage +{ +public: + /** Return a pointer to the requested OpenCL plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A pointer pointed to the OpenCL plane + */ + virtual ICLImage *cl_plane(unsigned int index) = 0; + /** Return a constant pointer to the requested OpenCL plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A constant pointer pointed to the OpenCL plane + */ + virtual const ICLImage *cl_plane(unsigned int index) const = 0; + + // Inherited methods overridden: + IImage *plane(unsigned int index) override; + const IImage *plane(unsigned int index) const override; +}; +} +#endif /*__ARM_COMPUTE_ICLMULTIIMAGE_H__ */ diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/arm_compute/core/CL/ICLSimple2DKernel.h new file mode 100644 index 0000000000..a1366fb211 --- /dev/null +++ b/arm_compute/core/CL/ICLSimple2DKernel.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ +class ICLSimple2DKernel : public ICLSimpleKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/arm_compute/core/CL/ICLSimple3DKernel.h new file mode 100644 index 0000000000..5e981027de --- /dev/null +++ b/arm_compute/core/CL/ICLSimple3DKernel.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. + * Both input tensor and output tensor must have at least 3 dimensions. + */ +class ICLSimple3DKernel : public ICLSimple2DKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h new file mode 100644 index 0000000000..e9fdb7fb8b --- /dev/null +++ b/arm_compute/core/CL/ICLSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_ICLSIMPLEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" + +namespace arm_compute +{ +/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */ +class ICLSimpleKernel : public ICLKernel +{ +public: + /** Constructor. */ + ICLSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLSimpleKernel(const ICLSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete; + /** Allow instances of this class to be moved. */ + ICLSimpleKernel(ICLSimpleKernel &&) = default; + /** Allow instances of this class to be moved. */ + ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default; + /** Default destructor */ + ~ICLSimpleKernel() = default; + + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const ICLTensor *_input; + ICLTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_ICLSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h new file mode 100644 index 0000000000..abc0131379 --- /dev/null +++ b/arm_compute/core/CL/ICLTensor.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLTENSOR_H__ +#define __ARM_COMPUTE_ICLTENSOR_H__ + +#include "arm_compute/core/ITensor.h" + +#include + +namespace cl +{ +class Buffer; +class CommandQueue; +} + +namespace arm_compute +{ +/** Interface for OpenCL tensor */ +class ICLTensor : public ITensor +{ +public: + ICLTensor(); + ICLTensor(const ICLTensor &) = delete; + ICLTensor &operator=(const ICLTensor &) = delete; + ICLTensor(ICLTensor &&) = default; + ICLTensor &operator=(ICLTensor &&) = default; + virtual ~ICLTensor() = default; + + /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data. + * + * @return A reference to an OpenCL buffer containing the image's data. + */ + virtual const cl::Buffer &cl_buffer() const = 0; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(cl::CommandQueue &q, bool blocking = true); + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + void unmap(cl::CommandQueue &q); + /** Clear the contents of the tensor synchronously. + * + * @param[in,out] q The CL command queue to use for the clear operation. + */ + void clear(cl::CommandQueue &q); + + // Inherited methods overridden: + uint8_t *buffer() const override; + +protected: + /** Method to be implemented by the child class to map the OpenCL buffer + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; + /** Method to be implemented by the child class to unmap the OpenCL buffer + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + */ + virtual void do_unmap(cl::CommandQueue &q) = 0; + +private: + uint8_t *_mapping; +}; + +using ICLImage = ICLTensor; +} +#endif /*__ARM_COMPUTE_ICLTENSOR_H__ */ diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h new file mode 100644 index 0000000000..2fae35c974 --- /dev/null +++ b/arm_compute/core/CL/OpenCL.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OPENCL_H__ +#define __ARM_COMPUTE_OPENCL_H__ + +/* Configure the Khronos C++ wrapper to target OpenCL 1.2: */ +#define CL_HPP_ENABLE_EXCEPTIONS +#define CL_HPP_CL_1_2_DEFAULT_BUILD +#define CL_HPP_TARGET_OPENCL_VERSION 110 +#define CL_HPP_MINIMUM_OPENCL_VERSION 110 +#include + +namespace cl +{ +static const NDRange Range_128_1 = NDRange(128, 1); +} + +namespace arm_compute +{ +bool opencl_is_available(); +} +#endif /* __ARM_COMPUTE_OPENCL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..e8bd6aac7f --- /dev/null +++ b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the absolute difference kernel. + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class CLAbsoluteDifferenceKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved. */ + CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved. */ + CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~CLAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output images. + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1. */ + const ICLTensor *_input2; /**< Source tensor 2. */ + ICLTensor *_output; /**< Destination tensor. */ +}; +} +#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h new file mode 100644 index 0000000000..5c8ffdb404 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLAccumulateKernel.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_CLACCUMULATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the accumulate kernel. + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class CLAccumulateKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); +}; + +/** Interface for the accumulate weighted kernel. + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class CLAccumulateWeightedKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation images, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Interface for the accumulate squared kernel. + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class CLAccumulateSquaredKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} +#endif /*__ARM_COMPUTE_CLACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h new file mode 100644 index 0000000000..490e70544b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple3DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the activation layer kernel. */ +class CLActivationLayerKernel : public ICLSimple3DKernel +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer information. + */ + void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); +}; +} +#endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h new file mode 100644 index 0000000000..7d736cdf44 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ +#define __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the arithmetic addition kernel + * + * Arithmetic addition is computed by: + * @f[ output(x,y) = input1(x,y) + input2(x,y) @f] + */ +class CLArithmeticAdditionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticAdditionKernel(const CLArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticAdditionKernel &operator=(const CLArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArithmeticAdditionKernel(CLArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArithmeticAdditionKernel &operator=(CLArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~CLArithmeticAdditionKernel() = default; + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..afecf6ed7d --- /dev/null +++ b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ +#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the arithmetic subtraction kernel + * + * Arithmetic subtraction is computed by: + * @f[ output(x,y) = input1(x,y) - input2(x,y) @f] + */ +class CLArithmeticSubtractionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionKernel(const CLArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArithmeticSubtractionKernel &operator=(const CLArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionKernel(CLArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLArithmeticSubtractionKernel &operator=(CLArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~CLArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..088853841b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the BatchNormalization layer kernel. + */ +class CLBatchNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * The rest are optional and used for representing batches. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_mean; + const ICLTensor *_var; + const ICLTensor *_beta; + const ICLTensor *_gamma; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h new file mode 100644 index 0000000000..624c422abc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise AND operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class CLBitwiseAndKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h new file mode 100644 index 0000000000..c9026022e1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise NOT operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class CLBitwiseNotKernel : public ICLSimple2DKernel +{ +public: + /** Set the inputs and output images. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h new file mode 100644 index 0000000000..fe8710fbc1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEORKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise OR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class CLBitwiseOrKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h new file mode 100644 index 0000000000..f4e0b4df60 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ +#define __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the bitwise XOR operation kernel. + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class CLBitwiseXorKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default; + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8. + * @param[in] input2 Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; /**< Source tensor 1 */ + const ICLTensor *_input2; /**< Source tensor 2 */ + ICLTensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h new file mode 100644 index 0000000000..0960f7487a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBOX3X3KERNEL_H__ +#define __ARM_COMPUTE_CLBOX3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the box 3x3 filter kernel. + * + */ +class CLBox3x3Kernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + //Inherited methods overriden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLBOX3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h new file mode 100644 index 0000000000..5ca3e03412 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ +#define __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform Gradient computation. + */ +class CLGradientKernel : public ICLKernel +{ +public: + /** Constructor */ + CLGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLGradientKernel(const CLGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLGradientKernel &operator=(const CLGradientKernel &) = delete; + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and mag must all be the same size (either 16 or 32). + * + * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. + * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. + * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Source tensor - Gx component */ + const ICLTensor *_gy; /**< Source tensor - Gy component */ + ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ + ICLTensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed. + * + * @note Hysteresis is computed in @ref CLEdgeTraceKernel + */ +class CLEdgeNonMaxSuppressionKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U16/U32. + * @param[in] lower_thr Lower threshold. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ + const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ + ICLTensor *_output; /**< Destination tensor. */ +}; + +/** OpenCL kernel to perform Edge tracing. + */ +class CLEdgeTraceKernel : public ICLKernel +{ +public: + /** Constructor */ + CLEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. + * Expected to be initialized to 0 before each run. + * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. + * Expected to be initialized to 0 before each run. + * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. + * Expected to be initialized to 0 before each run. + */ + void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, + ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Source tensor. */ + ICLTensor *_output; /**< Destination tensor. */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ + ICLTensor *_visited; /**< Marks visited elements */ + ICLTensor *_recorded; /**< Marks recorded elements */ + ICLTensor *_l1_stack; /**< L1 hysteris stack */ + ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ +}; +} +#endif /* __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h new file mode 100644 index 0000000000..3e718a2f1a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ +#define __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel combine kernel */ +class CLChannelCombineKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel(CLChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; + /** Default destructor */ + ~CLChannelCombineKernel() = default; + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output tensor. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + std::array _planes; + ICLTensor *_output; + ICLMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; +}; +} +#endif /* __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h new file mode 100644 index 0000000000..3e9e699a50 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ +#define __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the channel extract kernel */ +class CLChannelExtractKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel(CLChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; + /** Default destructor */ + ~CLChannelExtractKernel() = default; + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. + * @param[in] channel Channel to extract. + * @param[out] output Single-planar 2D destination image. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + uint32_t _num_elems_processed_per_iteration; + uint32_t _subsampling; +}; +} +#endif /* __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h new file mode 100644 index 0000000000..9d445e3004 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOL2IMKERNEL_H__ +#define __ARM_COMPUTE_CLCOL2IMKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the col2im reshaping kernel. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class CLCol2ImKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel(const CLCol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCol2ImKernel(CLCol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; + /** Default destructor */ + ~CLCol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: F16, F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_CLCOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h new file mode 100644 index 0000000000..a88e2dcdf3 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLColorConvertKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the color convert kernel. + * + */ +class CLColorConvertKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel(const CLColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLColorConvertKernel(CLColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; + /** Default destructor. */ + ~CLColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor + * @param[out] output Destination tensor + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Set the input and output of the kernel + * + * @param[in] input multi-planar source image + * @param[out] output single-planar destination image + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Set the input and output of the kernel + * + * @param[in] input single-planar source image + * @param[out] output multi-planar destination image + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input multi-planar source image + * @param[out] output multi-planar destination image + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /*pointer to single planar tensor input */ + ICLTensor *_output; /*pointer to single planar tensor output */ + const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ + ICLMultiImage *_multi_output; /*pointer to multi-planar output */ +}; +} + +#endif /* __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h new file mode 100644 index 0000000000..9c0908405a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ +#define __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class CLConvolutionKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ +using CLConvolution3x3Kernel = CLConvolutionKernel<3>; +/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ +using CLConvolution5x5Kernel = CLConvolutionKernel<5>; +/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ +using CLConvolution7x7Kernel = CLConvolutionKernel<7>; +/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ +using CLConvolution9x9Kernel = CLConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel +{ +public: + /** Default Constructor */ + CLSeparableConvolutionHorKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ +template +class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; + +/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ +using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ +using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ +using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class CLConvolutionRectangleKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h new file mode 100644 index 0000000000..eda4c66883 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLDepthConcatenateKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateKernel(const CLDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthConcatenateKernel &operator=(const CLDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthConcatenateKernel(CLDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthConcatenateKernel &operator=(CLDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~CLDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: F32. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + int _top_bottom; + int _left_right; +}; +} +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h new file mode 100644 index 0000000000..2c3b1b8b69 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depth conversion kernel. + * + */ +class CLDepthConvertKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output of the kernel. + * + * Valid conversions Input -> Output : + * + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * + * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} + +#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h new file mode 100644 index 0000000000..17552aefbe --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDerivativeKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ +#define __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the derivative kernel. */ +class CLDerivativeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDerivativeKernel(const CLDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDerivativeKernel(CLDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; + /** Default destructor */ + ~CLDerivativeKernel() = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ + bool _run_derivative_x; /**< Do we need to run Derivative X ? */ + bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ +}; +} +#endif /*__ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h new file mode 100644 index 0000000000..a5d3beb02f --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDilateKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDILATEKERNEL_H__ +#define __ARM_COMPUTE_CLDILATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the dilate kernel. + * + */ +class CLDilateKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLDILATEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h new file mode 100644 index 0000000000..a43c925be6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLErodeKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLERODEKERNEL_H__ +#define __ARM_COMPUTE_CLERODEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the erode kernel. + * + */ +class CLErodeKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLERODEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h new file mode 100644 index 0000000000..9817b78ae0 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLFastCornersKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ +#define __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** CL kernel to perform fast corners */ +class CLFastCornersKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel(const CLFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFastCornersKernel(CLFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; + /** Default destructor */ + ~CLFastCornersKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Output image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_mode Strategy to use for borders. + */ + void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLImage *_input; + ICLImage *_output; +}; + +/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ +class CLCopyToArrayKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLCopyToArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; + /** Default destructor */ + ~CLCopyToArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] update_number Flag to indicate whether we need to update the number of corners + * @param[out] corners Array of keypoints to store the results. + * @param[out] num_buffers Number of keypoints to store the results. + */ + void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< source image */ + ICLKeyPointArray *_corners; /**< destination array */ + cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ +}; +} +#endif /* __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h new file mode 100644 index 0000000000..797f86dae8 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_CLFILLBORDERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for filling the border of a kernel */ +class CLFillBorderKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel(const CLFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLFillBorderKernel(CLFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default; + /** Default destructor */ + ~CLFillBorderKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in,out] tensor Tensor to process Data types supported: U8, S16, S32, F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + /** Function to set the constant value on fill border kernel depending on type. + * + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + bool is_parallelisable() const override; + +private: + ICLTensor *_tensor; +}; +} +#endif /*__ARM_COMPUTE_CLFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..3ac7b3c4fa --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ +#define __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which interleaves the elements of a matrix A in chunk of 4x4 + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class CLGEMMInterleave4x4Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMInterleave4x4Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMInterleave4x4Kernel(const CLGEMMInterleave4x4Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMInterleave4x4Kernel &operator=(const CLGEMMInterleave4x4Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMInterleave4x4Kernel(CLGEMMInterleave4x4Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMInterleave4x4Kernel &operator=(CLGEMMInterleave4x4Kernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..f84d0638da --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to compute low precision matrix multiplication kernel + * + * This kernel performs the following computation: + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + */ +class CLGEMMLowpMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default Constructor */ + CLGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyKernel(const CLGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMLowpMatrixMultiplyKernel &operator=(const CLGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyKernel(CLGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMLowpMatrixMultiplyKernel &operator=(CLGEMMLowpMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel. + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data types supported: U8 + * @param[in] input1 Input tensor containing the transposed Matrix B. Data types supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication, Data types supported: same as @p input0 + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Offset to be added to each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..ea1db9f831 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +/** Interface to add a bias to each row of the input tensor + * + */ +class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input + */ + void configure(ICLTensor *accum, const ICLTensor *biases); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_accum; + const ICLTensor *_biases; +}; +} + +#endif /*__ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..c808039567 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta. + * The matrices must have the same dimensions + * + * @note This kernel is computed if and only if beta != 0.0. + */ +class CLGEMMMatrixAdditionKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAdditionKernel(const CLGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixAdditionKernel &operator=(const CLGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAdditionKernel(CLGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixAdditionKernel &operator=(CLGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input, output and beta value + * + * @note The input and output tensors must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref CLGEMMMatrixMultiplyKernel. Data type supported: same as @p input + * @param[in] beta Weight of matrix C + */ + void configure(const ICLTensor *input, ICLTensor *output, float beta); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} + +#endif /* __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..07ea3c12ac --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLGEMMMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..8d44a4c4fa --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16. + * + * Following an example of how the transposition1xW works when the input data type is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a7 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & 17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & 27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & 37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ] + * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ] + * @note If the input data type is U8, the output matrix will have the following shape: [ height * 16, width / 16 ] + * + */ +class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/F16/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /* __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h new file mode 100644 index 0000000000..028a10b421 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the Gaussian 3x3 filter kernel. + * + */ +class CLGaussian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h new file mode 100644 index 0000000000..1484c06311 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ + +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5HorKernel::configure; +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ +class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel +{ +public: + /** Initialise the kernel's source, destination and border. + * + * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + +private: + //Make the configure method of the parent class private + using CLSeparableConvolution5x5VertKernel::configure; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h new file mode 100644 index 0000000000..6d79d0e718 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ +#define __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimpleKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ +class CLGaussianPyramidHorKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + int _l2_load_offset; +}; + +/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ +class CLGaussianPyramidVertKernel : public ICLSimpleKernel +{ +public: + /** Default constructor */ + CLGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~CLGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data types supported: U16. + * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h new file mode 100644 index 0000000000..45a5aac1bc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ +#define __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** OpenCL kernel to perform HOG Orientation Binning */ +class CLHOGOrientationBinningKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~CLHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input_magnitude; + const ICLTensor *_input_phase; + ICLTensor *_output; + Size2D _cell_size; +}; + +/** OpenCL kernel to perform HOG block normalization */ +class CLHOGBlockNormalizationKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~CLHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + Size2D _num_cells_per_block_stride; +}; +} +#endif /* __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h new file mode 100644 index 0000000000..47bd0549ee --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ +#define __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/OpenCL.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform HOG detector kernel using linear SVM */ +class CLHOGDetectorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; + /** Default destructor */ + ~CLHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] num_detection_windows Number of detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, + uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue); + +private: + const ICLTensor *_input; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer *_num_detection_windows; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h new file mode 100644 index 0000000000..d8057df8d1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ +#define __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the harris score kernel. + * + * @note The implementation supports 3, 5, and 7 for the block_size. + */ +class CLHarrisScoreKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; + /** Default destructor */ + ~CLHarrisScoreKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) + * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) + * @param[out] output Destination image (harris score). Data types supported F32 + * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, + int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, + bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +protected: + const ICLImage *_input1; /**< Source image - Gx component */ + const ICLImage *_input2; /**< Source image - Gy component */ + ICLImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; +} +#endif /* __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h new file mode 100644 index 0000000000..b65e62d9a2 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLHistogramKernel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__ +#define __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. + * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel + */ +class CLHistogramKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel(const CLHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramKernel(CLHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramKernel &operator=(CLHistogramKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; + +/** Interface to run the histogram kernel to handle the leftover part of image + * + */ +class CLHistogramBorderKernel : public ICLKernel +{ +public: + /** Constructor */ + CLHistogramBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[out] output Destination distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + ICLDistribution1D *_output; +}; +} + +#endif /* __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__*/ diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h new file mode 100644 index 0000000000..d2224b53e1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLIM2COLKERNEL_H__ +#define __ARM_COMPUTE_CLIM2COLKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class CLIm2ColKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel(const CLIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIm2ColKernel(CLIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] convolved_dims The convolved output dimensions. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_reduced(const Window &window, cl::CommandQueue &queue); + /** run the generic convolution layer input reshape kernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_generic(const Window &window, cl::CommandQueue &queue); + + /** Common signature for the kernel to run */ + using Im2ColFunction = void (CLIm2ColKernel::*)(const Window &, cl::CommandQueue &); + +private: + const ICLTensor *_input; + ICLTensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + int _kernel_size; + unsigned int _num_elems_processed_per_iteration; + Im2ColFunction _run_func; +}; +} + +#endif /*__ARM_COMPUTE_CLIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h new file mode 100644 index 0000000000..0f53c2d2a8 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ +#define __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to run the horizontal pass of the integral image kernel. */ +class CLIntegralImageHorKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; + +/** Interface to run the vertical pass of the integral image kernel. */ +class CLIntegralImageVertKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLIntegralImageVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in,out] in_out The input/output tensor. Data types supported: U32 + */ + void configure(ICLTensor *in_out); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_in_out; +}; +} +#endif /*__ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h new file mode 100644 index 0000000000..4d0dbed55d --- /dev/null +++ b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLKTRACKERKERNEL_H__ +#define __ARM_COMPUTE_CLLKTRACKERKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Internal keypoint structure for Lucas-Kanade Optical Flow */ +struct CLLKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + float tracking_status{ 0.f }; /**< the tracking status of the keypoint */ + float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */ +struct CLCoefficientTable +{ + float A11; /**< iA11 * FLT_SCALE */ + float A12; /**< iA11 * FLT_SCALE */ + float A22; /**< iA11 * FLT_SCALE */ + float min_eig; /**< Minimum eigenvalue */ +}; + +/** Structure for storing ival, ixval and iyval for each point inside the window */ +struct CLOldValue +{ + int16_t ival; /**< ival extracts from old image */ + int16_t ixval; /**< ixval extracts from scharr Gx image */ + int16_t iyval; /**< iyval extracts from scharr Gy image */ + int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ +}; + +using ICLLKInternalKeypointArray = ICLArray; +using ICLCoefficientTableArray = ICLArray; +using ICLOldValArray = ICLArray; + +/** Interface to run the initialization step of LKTracker */ +class CLLKTrackerInitKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points + * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points + * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */ +class CLLKTrackerFinalizeKernel : public ICLKernel +{ +public: + /** Initialise the kernel input and output + * + * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points + * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points + */ + void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */ +class CLLKTrackerStage0Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage0Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 + * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points + * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[out] old_ival Pointer to the array holding internal values + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, + ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, + ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_old_input; + const ICLTensor *_old_scharr_gx; + const ICLTensor *_old_scharr_gy; +}; + +/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */ +class CLLKTrackerStage1Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLKTrackerStage1Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default; + /** Initialise the kernel input and output + * + * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 + * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points + * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients + * @param[in] old_ival Pointer to the array holding internal values + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminating the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + */ + void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_new_input; +}; +} +#endif /*__ARM_COMPUTE_CLLKTRACKERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..fda0327461 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h new file mode 100644 index 0000000000..a8e1dcb361 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ +#define __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Template interface for the kernel to compute magnitude and phase. + * + */ +class CLMagnitudePhaseKernel : public ICLKernel +{ +public: + /** Default constructor. */ + CLMagnitudePhaseKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; + /** Initialise the kernel's input, output. + * + * @note At least one of output1 or output2 must be set. + * + * @param[in] gx The input gradient X tensor. Data types supported: S16. + * @param[in] gy The input gradient Y tensor. Data types supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, + MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_gx; /**< Input gradient X. */ + const ICLTensor *_gy; /**< Input gradient Y. */ + ICLTensor *_magnitude; /**< Output - Magnitude. */ + ICLTensor *_phase; /**< Output - Phase. */ + bool _run_mag; /**< Calculate magnitude ? */ + bool _run_phase; /**< Calculate phase ? */ +}; +} + +#endif /* __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h new file mode 100644 index 0000000000..9f30f76e1b --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ +#define __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace cl +{ +class Buffer; +} + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class CLMeanStdDevKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). + */ + void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; + float *_mean; + float *_stddev; + cl::Buffer *_global_sum; + cl::Buffer *_global_sum_squared; +}; +} +#endif /* __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h new file mode 100644 index 0000000000..5af364b6c6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ +#define __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the median 3x3 filter kernel. + * + */ +class CLMedian3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h new file mode 100644 index 0000000000..6a31f3cf18 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ +#define __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLKernel.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Interface for the kernel to perform min max search on an image. + */ +class CLMinMaxKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel(const CLMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxKernel(CLMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input Image. Data types supported: U8 or S16. + * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32. + */ + void configure(const ICLImage *input, cl::Buffer *min_max); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; /**< Input image. */ + cl::Buffer *_min_max; /**< Minimum/maximum value. */ + std::array _data_type_max_min; /**< Maximum and minimum data type value respectively. */ +}; + +/** Interface for the kernel to find min max locations of an image. + */ +class CLMinMaxLocationKernel : public ICLKernel +{ +public: + /** Constructor */ + CLMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; + /** Initialise the kernel's input and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8 or S16. + * @param[in] min_max Buffer of 2 elements which contains the min value at position 0 and the max value at position 1. Data type supported: S32 + * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + */ + void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, + ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLImage *_input; /**< Input image. */ + cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ +}; +} +#endif /*__ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h new file mode 100644 index 0000000000..0c59063bbc --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ +#define __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to apply a non-linear filter */ +class CLNonLinearFilterKernel : public ICLSimple2DKernel +{ +public: + /** Default constructor */ + CLNonLinearFilterKernel(); + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, + unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; + +private: + BorderSize _border_size; /**< Border size */ +}; +} +#endif /*__ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..1719bbbb47 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ +#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL + * + * @note Used by @ref CLFastCorners and @ref CLHarrisCorners + */ +class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) + * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h new file mode 100644 index 0000000000..ca9034b162 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the normalization layer kernel. + */ +class CLNormalizationLayerKernel : public ICLKernel +{ +public: + /** Constructor */ + CLNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. + * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data types should match the input type. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, const ICLTensor *squared_input, ICLTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + const ICLTensor *_squared_input; + ICLTensor *_output; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..6fbbe95219 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pixelwise multiplication kernel. + * + */ +class CLPixelWiseMultiplicationKernel : public ICLKernel +{ +public: + /** Default constructor.*/ + CLPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F16, F32. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input1; + const ICLTensor *_input2; + ICLTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h new file mode 100644 index 0000000000..546a40b15e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the pooling layer kernel */ +class CLPoolingLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default; + /** Default destructor */ + ~CLPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + PoolingLayerInfo _pool_info; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h new file mode 100644 index 0000000000..7cebf2e817 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLRemapKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLREMAPKERNEL_H__ +#define __ARM_COMPUTE_CLREMAPKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel to perform a remap on a tensor */ +class CLRemapKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLRemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLRemapKernel(const CLRemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLRemapKernel &operator=(const CLRemapKernel &) = delete; + /** Allow instances of this class to be moved */ + CLRemapKernel(CLRemapKernel &&) = default; + /** Allow instances of this class to be moved */ + CLRemapKernel &operator=(CLRemapKernel &&) = default; + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_map_x; + const ICLTensor *_map_y; +}; +} +#endif /*__ARM_COMPUTE_CLREMAPKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h new file mode 100644 index 0000000000..e74a7cb82a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLScaleKernel.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCALEKERNEL_H__ +#define __ARM_COMPUTE_CLSCALEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLScaleKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * + * @param[in] input Source tensor. Data types supported: U8, S16. + * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} + +#endif /*__ARM_COMPUTE_CLSCALEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h new file mode 100644 index 0000000000..fe245cc351 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ +#define __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -3 & 0 & +3\\ + * -10& 0 & +10\\ + * -3 & 0 & +3 + * \end{vmatrix} + * @f] + * @f[ + * \mathbf{G}_y=\begin{vmatrix} + * -3 & -10 & -3\\ + * 0 & 0 & 0\\ + * +3 & +10 & +3 + * \end{vmatrix} + * @f] + */ +class CLScharr3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ICLTensor *_input; /**< Input image */ + ICLTensor *_output_x; /**< Output image for scharr X */ + ICLTensor *_output_y; /**< Output image for scharr Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h new file mode 100644 index 0000000000..9edeb6ceff --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ +class CLSobel3x3Kernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; + /** Default destructor */ + ~CLSobel3x3Kernel() = default; + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< Output tensor for Sobel X */ + ICLTensor *_output_y; /**< Output tensor for Sobel Y */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h new file mode 100644 index 0000000000..e90f8f587e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ +class CLSobel5x5VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; + /** Default destructor */ + ~CLSobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h new file mode 100644 index 0000000000..e5ef8444ee --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ +#define __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7HorKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input; /**< Input tensor */ + ICLTensor *_output_x; /**< X output of horizontal pass */ + ICLTensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ +class CLSobel7x7VertKernel : public ICLKernel +{ +public: + /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ + CLSobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; + /** Default destructor */ + ~CLSobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set and the corresponding input. + * + * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ + const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ + ICLTensor *_output_x; /**< X output of sobel */ + ICLTensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h new file mode 100644 index 0000000000..0806974ad6 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the identifying the max value of 1D Logits */ +class CLLogits1DMaxKernel : public ICLSimple2DKernel +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class CLLogits1DShiftExpSumKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[in] max Max values tensor. Matching input type and channel number. + * @param[out] output Destination tensor. Matching input type and channel number. + * @param[out] sum Sum of 1D logits tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_max; + ICLTensor *_output; + ICLTensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class CLLogits1DNormKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLLogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Matching input type and channel number. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_sum; + ICLTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h new file mode 100644 index 0000000000..477f58dc38 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLTableLookupKernel.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Interface for the kernel to perform table lookup calculations. */ +class CLTableLookupKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8, S16. + * @param[in] lut The input LUT. Data types supported: U8, S16. + * @param[out] output The output tensor. Data types supported: U8, S16. + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h new file mode 100644 index 0000000000..d7a6ae2cdb --- /dev/null +++ b/arm_compute/core/CL/kernels/CLThresholdKernel.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__ +#define __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the thresholding kernel. + * + */ +class CLThresholdKernel : public ICLSimple2DKernel +{ +public: + /**Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLDKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h new file mode 100644 index 0000000000..9ad183f8f1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** OpenCL kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class CLTransposeKernel : public ICLSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h new file mode 100644 index 0000000000..05d6d0a8f7 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ +#define __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the warp affine kernel.*/ +class CLWarpAffineKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h new file mode 100644 index 0000000000..5c5013c599 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ +#define __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ + +#include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; +/** Interface for the warp perspective kernel.*/ +class CLWarpPerspectiveKernel : public ICLSimple2DKernel +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + */ + void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy); + + // Inherited methods overridden: + BorderSize border_size() const override; +}; +} + +#endif /*__ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h new file mode 100644 index 0000000000..1dc8a8b80e --- /dev/null +++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class CLWeightsReshapeKernel : public ICLKernel +{ +public: + /** Constructor. + * + * @param[in] is_shared Flag to indicate whether the weights are shared or not. + */ + CLWeightsReshapeKernel(bool is_shared = false); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~CLWeightsReshapeKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: F16, F32 + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + */ + void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output); + + // Inherited methods overridden: + virtual void run(const Window &window, cl::CommandQueue &queue) = 0; + +protected: + bool _is_shared; + const ICLTensor *_input; + const ICLTensor *_biases; + ICLTensor *_output; +}; + +/** Interface for the weights reshape kernel used by convolution and fully connected layers. + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref CLIm2ColKernel can transform a convolution into a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class CLConvolutionLayerWeightsReshapeKernel : public CLWeightsReshapeKernel +{ +public: + /** Default constructor */ + CLConvolutionLayerWeightsReshapeKernel(); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; + +/** Interface for the weights reshape kernel used by locally connected layers. */ +class CLLocallyConnectedLayerWeightsReshapeKernel : public CLWeightsReshapeKernel +{ +public: + /** Default constructor */ + CLLocallyConnectedLayerWeightsReshapeKernel(); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; +}; +} +#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h new file mode 100644 index 0000000000..1eabfa9437 --- /dev/null +++ b/arm_compute/core/CPP/CPPKernels.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPKERNELS_H__ +#define __ARM_COMPUTE_CPPKERNELS_H__ + +/* Header regrouping all the CPP kernels */ +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" + +#endif /* __ARM_COMPUTE_CPPKERNELS_H__ */ diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h new file mode 100644 index 0000000000..99ae68f2e5 --- /dev/null +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICPPKERNEL_H__ +#define __ARM_COMPUTE_ICPPKERNEL_H__ + +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class Window; + +/** Common interface for all kernels implemented in C++ */ +class ICPPKernel : public IKernel +{ +public: + /** Default destructor */ + virtual ~ICPPKernel() = default; + + /** Execute the kernel on the passed window + * + * @warning If is_parallelisable() returns false then the passed window must be equal to window() + * + * @note The window has to be a region within the window returned by the window() method + * + * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + */ + virtual void run(const Window &window) = 0; +}; +} +#endif /*__ARM_COMPUTE_ICPPKERNEL_H__ */ diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h new file mode 100644 index 0000000000..105de397a2 --- /dev/null +++ b/arm_compute/core/CPP/ICPPSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ +class ICPPSimpleKernel : public ICPPKernel +{ +public: + /** Constructor */ + ICPPSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICPPSimpleKernel(const ICPPSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete; + /** Allow instances of this class to be moved */ + ICPPSimpleKernel(ICPPSimpleKernel &&) = default; + /** Allow instances of this class to be moved */ + ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default; + /** Default destructor */ + ~ICPPSimpleKernel() = default; + +protected: + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const ITensor *_input; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_ICPPSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h new file mode 100644 index 0000000000..0866d4ee57 --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ +#define __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** CPP kernel to perform corner candidates + */ +class CPPCornerCandidatesKernel : public INEKernel +{ +public: + /** Default constructor */ + CPPCornerCandidatesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPCornerCandidatesKernel(const CPPCornerCandidatesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPCornerCandidatesKernel &operator=(const CPPCornerCandidatesKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPCornerCandidatesKernel(CPPCornerCandidatesKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPCornerCandidatesKernel &operator=(CPPCornerCandidatesKernel &&) = default; + /** Default destructor */ + ~CPPCornerCandidatesKernel() = default; + + /** Setup the kernel parameters + * + * @param[in] input Source image (harris score). Format supported F32 + * @param[out] output Destination array of InternalKeypoint + * @param[out] num_corner_candidates Number of corner candidates + */ + void configure(const IImage *input, InternalKeypoint *output, int32_t *num_corner_candidates); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + int32_t *_num_corner_candidates; /**< Number of corner candidates */ + std::mutex _corner_candidates_mutex; /**< Mutex to preventing race conditions */ + const IImage *_input; /**< Source image - Harris score */ + InternalKeypoint *_output; /**< Array of NEInternalKeypoint */ +}; +} //namespace arm_compute +#endif /* __ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h new file mode 100644 index 0000000000..bcb3026959 --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ +#define __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** CPP kernel to perform in-place computation of euclidean distance on IDetectionWindowArray + * + * @note This kernel is meant to be used alongside HOG or other object detection algorithms to perform a non-maxima suppression on a + * IDetectionWindowArray + */ +class CPPDetectionWindowNonMaximaSuppressionKernel : public ICPPKernel +{ +public: + /** Default constructor */ + CPPDetectionWindowNonMaximaSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPDetectionWindowNonMaximaSuppressionKernel(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPDetectionWindowNonMaximaSuppressionKernel &operator=(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; + /** Initialise the kernel's input, output and the euclidean minimum distance + * + * @attention: If @ref CLDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref CLDetectionWindowArray must be called respectively before and after + * the run() method of @ref CPPDetectionWindowNonMaximaSuppressionKernel + * + * @param[in, out] input_output Input/Output array of @ref DetectionWindow + * @param[in] min_distance Radial Euclidean distance for non-maxima suppression + */ + void configure(IDetectionWindowArray *input_output, float min_distance); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + IDetectionWindowArray *_input_output; + float _min_distance; +}; +} + +#endif /* __ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H__ */ diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h new file mode 100644 index 0000000000..b7a7d9ff9f --- /dev/null +++ b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ +#define __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/core/IArray.h" + +#include +#include + +namespace arm_compute +{ +/** CPP kernel to perform sorting and euclidean distance */ +class CPPSortEuclideanDistanceKernel : public ICPPKernel +{ +public: + /** Default constructor */ + CPPSortEuclideanDistanceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPSortEuclideanDistanceKernel(const CPPSortEuclideanDistanceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPSortEuclideanDistanceKernel &operator=(const CPPSortEuclideanDistanceKernel &) = delete; + /** Allow instances of this class to be moved */ + CPPSortEuclideanDistanceKernel(CPPSortEuclideanDistanceKernel &&) = default; + /** Allow instances of this class to be moved */ + CPPSortEuclideanDistanceKernel &operator=(CPPSortEuclideanDistanceKernel &&) = default; + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] in_out Input internal keypoints. Marked as out as the kernel writes 0 in the strength member. + * @param[out] output Output keypoints. + * @param[in] num_corner_candidates Pointer to the number of corner candidates in the input array + * @param[in] min_distance Radial Euclidean distance to use + */ + void configure(InternalKeypoint *in_out, IKeyPointArray *output, const int32_t *num_corner_candidates, float min_distance); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const int32_t *_num_corner_candidates; /**< Number of corner candidates */ + float _min_distance; /**< Radial Euclidean distance */ + InternalKeypoint *_in_out; /**< Source array of InternalKeypoint */ + IKeyPointArray *_output; /**< Destination array of IKeyPointArray */ +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H__ */ diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h new file mode 100644 index 0000000000..3a99abbd74 --- /dev/null +++ b/arm_compute/core/Coordinates.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_COORDINATES_H__ +#define __ARM_COMPUTE_COORDINATES_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Coordinates of an item */ +class Coordinates : public Dimensions +{ +public: + /** Constructor to initialize the coordinates. + * + * @param[in] coords Values to initialize the dimensions. + */ + template + constexpr Coordinates(Ts... coords) + : Dimensions{ coords... } + { + } + /** Allow instances of this class to be copy constructed */ + constexpr Coordinates(const Coordinates &) = default; + /** Allow instances of this class to be copied */ + Coordinates &operator=(const Coordinates &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Coordinates(Coordinates &&) = default; + /** Allow instances of this class to be moved */ + Coordinates &operator=(Coordinates &&) = default; + /** Default destructor */ + ~Coordinates() = default; +}; +} +#endif /*__ARM_COMPUTE_COORDINATES_H__*/ diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h new file mode 100644 index 0000000000..b080435b69 --- /dev/null +++ b/arm_compute/core/Dimensions.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DIMENSIONS_H__ +#define __ARM_COMPUTE_DIMENSIONS_H__ + +#include "arm_compute/core/Error.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/* Constant value used to indicate maximum dimensions of a Window, TensorShape and Coordinates */ +constexpr size_t MAX_DIMS = 6; + +/** Dimensions with dimensionality */ +template +class Dimensions +{ +public: + /** Number of dimensions the tensor has */ + static constexpr size_t num_max_dimensions = MAX_DIMS; + + /** Constructor to initialize the tensor shape. + * + * @param[in] dims Values to initialize the dimensions. + */ + template + Dimensions(Ts... dims) + : _id{ { dims... } }, _num_dimensions{ sizeof...(dims) } + { + } + + /** Allow instances of this class to be copy constructed */ + Dimensions(const Dimensions &) = default; + + /** Allow instances of this class to be copied */ + Dimensions &operator=(const Dimensions &) = default; + + /** Allow instances of this class to be move constructed */ + Dimensions(Dimensions &&) = default; + + /** Allow instances of this class to be moved */ + Dimensions &operator=(Dimensions &&) = default; + + /** Accessor to set the value of one of the dimensions. + * + * @param[in] dimension Dimension for which the value is set. + * @param[in] value Value to be set for the dimension. + */ + void set(size_t dimension, T value) + { + ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); + _id[dimension] = value; + _num_dimensions = std::max(_num_dimensions, dimension + 1); + } + /** Alias to access the size of the first dimension */ + T x() const + { + return _id[0]; + } + /** Alias to access the size of the second dimension */ + T y() const + { + return _id[1]; + } + /** Alias to access the size of the third dimension */ + T z() const + { + return _id[2]; + } + /** Generic accessor to get the size of any dimension + * + * @note Precondition: dimension < Dimensions::num_max_dimensions + * + * @param[in] dimension Dimension of the wanted size + * + * @return The size of the requested dimension. + */ + T operator[](size_t dimension) const + { + ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); + return _id[dimension]; + } + /** Returns the effective dimensionality of the tensor */ + unsigned int num_dimensions() const + { + return _num_dimensions; + } + + /** Set number of dimensions */ + void set_num_dimensions(size_t num_dimensions) + { + _num_dimensions = num_dimensions; + } + + /** Collapse dimensions. + * + * @param[in] first Dimensions into which the following @p n are collapsed. + * @param[in] n Number of dimensions to collapse into @p first. + */ + void collapse(size_t n, size_t first = 0) + { + ARM_COMPUTE_ERROR_ON(first + n > _id.size()); + + // Collapse dimensions into the first + _id[first] = std::accumulate(_id.cbegin() + first, _id.cbegin() + first + n, 1, std::multiplies()); + // Shift the remaining dimensions down + std::copy(_id.begin() + first + n, _id.end(), _id.begin() + first + 1); + // Reduce the number of dimensions + _num_dimensions -= n - 1; + // Fill the now empty dimensions with zero + std::fill(_id.begin() + _num_dimensions, _id.end(), 0); + } + + /** Returns a read/write iterator that points to the first element in the dimension array. */ + typename std::array::iterator begin() + { + return _id.begin(); + } + /** Returns a read-only (constant) iterator that points to the first element in the dimension array. */ + typename std::array::const_iterator begin() const + { + return _id.begin(); + } + /** Returns a read-only (constant) iterator that points to the first element in the dimension array. */ + typename std::array::const_iterator cbegin() const + { + return begin(); + } + /** Returns a read/write iterator that points one past the last element in the dimension array. */ + typename std::array::iterator end() + { + return _id.end(); + } + /** Returns a read-only (constant) iterator that points one past the last element in the dimension array. */ + typename std::array::const_iterator end() const + { + return _id.end(); + } + /** Returns a read-only (constant) iterator that points one past the last element in the dimension array. */ + typename std::array::const_iterator cend() const + { + return end(); + } + +protected: + /** Protected destructor. */ + ~Dimensions() = default; + + std::array _id; + size_t _num_dimensions{ 0 }; +}; +} +#endif /*__ARM_COMPUTE_DIMENSIONS_H__*/ diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h new file mode 100644 index 0000000000..c4c452bacf --- /dev/null +++ b/arm_compute/core/Error.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ERROR_H__ +#define __ARM_COMPUTE_ERROR_H__ + +/** Print the given message then throw an std::runtime_error. + * + * @param[in] ... Message to display before aborting. + */ +#define ARM_COMPUTE_ERROR(...) ::arm_compute::error(__func__, __FILE__, __LINE__, __VA_ARGS__) // NOLINT + +/** Print the given message then throw an std::runtime_error. + * + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + * @param[in] ... Message to display before aborting. + */ +#define ARM_COMPUTE_ERROR_LOC(func, file, line, ...) ::arm_compute::error(func, file, line, __VA_ARGS__) // NOLINT + +/** To avoid unused variables warnings + * + * This is useful if for example a variable is only used + * in debug builds and generates a warning in release builds. + * + * @param[in] var Variable which is unused + */ +#define ARM_COMPUTE_UNUSED(var) (void)(var) + +#ifdef ARM_COMPUTE_DEBUG_ENABLED +/** Print the given message + * + * @param[in] ... Message to display + */ +#define ARM_COMPUTE_INFO(...) ::arm_compute::debug(__func__, __FILE__, __LINE__, __VA_ARGS__) // NOLINT +/** If the condition is true, the given message is printed + * + * @param[in] cond Condition to evaluate. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_INFO_ON_MSG(cond, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_INFO(__VA_ARGS__); \ + } \ + } while(0) +#else /* ARM_COMPUTE_DEBUG_ENABLED */ +#define ARM_COMPUTE_INFO_ON_MSG(cond, ...) +#define ARM_COMPUTE_INFO(...) +#endif /* ARM_COMPUTE_DEBUG_ENABLED */ + +#ifdef ARM_COMPUTE_ASSERTS_ENABLED +/** If the condition is true, the given message is printed and an exception is thrown + * + * @param[in] cond Condition to evaluate. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_ERROR_ON_MSG(cond, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_ERROR(__VA_ARGS__); \ + } \ + } while(0) + +/** If the condition is true, the given message is printed and an exception is thrown + * + * @param[in] cond Condition to evaluate. + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + * @param[in] ... Message to print if cond is false. + */ +#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \ + do \ + { \ + if(cond) \ + { \ + ARM_COMPUTE_ERROR_LOC(func, file, line, __VA_ARGS__); \ + } \ + } while(0) + +/** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned + * + * @param[in] cond Condition to evaluate. + * @param[in] val Value to be returned. + * @param[in] msg Message to print if cond is false. + */ +#define ARM_COMPUTE_CONST_ON_ERROR(cond, val, msg) (cond) ? throw std::logic_error(msg) : val; +#else /* ARM_COMPUTE_ASSERTS_ENABLED */ +#define ARM_COMPUTE_ERROR_ON_MSG(cond, ...) +#define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) +#define ARM_COMPUTE_CONST_ON_ERROR(cond, val, msg) val +#endif /* ARM_COMPUTE_ASSERTS_ENABLED */ + +/** If the condition is true then an error message is printed and an exception thrown + * + * @param[in] cond Condition to evaluate + */ +#define ARM_COMPUTE_ERROR_ON(cond) \ + ARM_COMPUTE_ERROR_ON_MSG(cond, #cond) + +/** If the condition is true then an error message is printed and an exception thrown + * + * @param[in] cond Condition to evaluate + * @param[in] func Function in which the error occurred. + * @param[in] file File in which the error occurred. + * @param[in] line Line in which the error occurred. + */ +#define ARM_COMPUTE_ERROR_ON_LOC(cond, func, file, line) \ + ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, #cond) + +namespace arm_compute +{ +/** Print an error message then throw an std::runtime_error + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] msg Message to display before aborting. + * @param[in] ... Variable number of arguments of the message. + */ +[[noreturn]] void error(const char *function, const char *file, const int line, const char *msg, ...); + +/** Print a debug message + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] msg Message to display before aborting. + * @param[in] ... Variable number of arguments of the message. + */ +void debug(const char *function, const char *file, const int line, const char *msg, ...); +} + +#endif /* __ARM_COMPUTE_ERROR_H__ */ diff --git a/arm_compute/core/FixedPoint.h b/arm_compute/core/FixedPoint.h new file mode 100644 index 0000000000..925b4949a3 --- /dev/null +++ b/arm_compute/core/FixedPoint.h @@ -0,0 +1,217 @@ +/* + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_FIXEDPOINT_H__ +#define __ARM_COMPUTE_FIXEDPOINT_H__ + +#include + +namespace arm_compute +{ +using qint8_t = int8_t; /**< 8 bit fixed point scalar value */ +using qint16_t = int16_t; /**< 16 bit fixed point scalar value */ +using qint32_t = int32_t; /**< 32 bit fixed point scalar value */ + +/** 8 bit fixed point scalar saturating shift left + * + * @param[in] a First 8 bit fixed point input + * @param[in] shift Shift amount + * + * @return The result of the 8 bit fixed point shift. The result is saturated in case of overflow + */ +qint8_t sqshl_qs8(qint8_t a, int shift); + +/** 8 bit fixed point scalar absolute value + * + * @param[in] a 8 bit fixed point input + * + * @return The result of the 8 bit fixed point absolute value + */ +qint8_t sabs_qs8(qint8_t a); + +/** 8 bit fixed point scalar add + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point addition + */ +qint8_t sadd_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar saturating add + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point addition. The result is saturated in case of overflow + */ +qint8_t sqadd_qs8(qint8_t a, qint8_t b); + +/** 16 bit fixed point scalar saturating add + * + * @param[in] a First 16 bit fixed point input + * @param[in] b Second 16 bit fixed point input + * + * @return The result of the 16 bit fixed point addition. The result is saturated in case of overflow + */ +qint16_t sqadd_qs16(qint16_t a, qint16_t b); + +/** 8 bit fixed point scalar subtraction + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point subtraction + */ +qint8_t ssub_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar saturating subtraction + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * + * @return The result of the 8 bit fixed point subtraction. The result is saturated in case of overflow + */ +qint8_t sqsub_qs8(qint8_t a, qint8_t b); + +/** 8 bit fixed point scalar multiply + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication. + */ +qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar saturating multiply + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication. The result is saturated in case of overflow + */ +qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar multiply long + * + * @param[in] a First 8 bit fixed point input + * @param[in] b Second 8 bit fixed point input + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point multiplication long. The result is saturated in case of overflow + */ +qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 16 bit fixed point scalar saturating multiply +* +* @param[in] a First 16 bit fixed point input +* @param[in] b Second 16 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 16 bit fixed point multiplication. The result is saturated in case of overflow +*/ +qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position); + +/** 8 bit fixed point scalar inverse square root +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point inverse square root. +*/ +qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position); + +/** 8 bit fixed point scalar division +* +* @param[in] a First 8 bit fixed point input +* @param[in] b Second 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point division. +*/ +qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position); + +/** 8 bit fixed point scalar exponential +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point exponential. +*/ +qint8_t sexp_qs8(qint8_t a, int fixed_point_position); + +/** 8 bit fixed point scalar logarithm +* +* @param[in] a 8 bit fixed point input +* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number +* +* @return The result of the 8 bit fixed point logarithm. +*/ +qint8_t slog_qs8(qint8_t a, int fixed_point_position); + +/** Convert an 8 bit fixed point to float + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float + */ +float scvt_f32_qs8(qint8_t a, int fixed_point_position); + +/** Convert a float to 8 bit fixed point + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8_t scvt_qs8_f32(float a, int fixed_point_position); + +/** Convert a 16 bit fixed point to float + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 16 bit fixed point -> float + */ +float scvt_f32_qs16(qint16_t a, int fixed_point_position); + +/** Convert a float to 16 bit fixed point + * + * @param[in] a Input to convert + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 16 bit fixed point + */ +qint8_t scvt_qs16_f32(float a, int fixed_point_position); + +/** Scalar saturating move and narrow. + * + * @param[in] a Input to convert to 8 bit fixed point + * + * @return The narrowing conversion to 8 bit + */ +qint8_t sqmovn_qs16(qint16_t a); +} +#include "arm_compute/core/FixedPoint.inl" +#endif /* __ARM_COMPUTE_FIXEDPOINT_H__ */ diff --git a/arm_compute/core/FixedPoint.inl b/arm_compute/core/FixedPoint.inl new file mode 100644 index 0000000000..4263a6f00d --- /dev/null +++ b/arm_compute/core/FixedPoint.inl @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +namespace +{ +template +inline TpSat saturate_convert(TpIn a) +{ + if(a > std::numeric_limits::max()) + { + a = std::numeric_limits::max(); + } + if(a < std::numeric_limits::min()) + { + a = std::numeric_limits::min(); + } + return static_cast(a); +} +} // namespace + +namespace arm_compute +{ +inline qint8_t sqshl_qs8(qint8_t a, int shift) +{ + qint16_t tmp = static_cast(a) << shift; + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint8_t sabs_qs8(qint8_t a) +{ + return a & 0x7F; +} + +inline qint8_t sadd_qs8(qint8_t a, qint8_t b) +{ + return a + b; +} + +inline qint8_t sqadd_qs8(qint8_t a, qint8_t b) +{ + // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow + qint16_t tmp = (static_cast(a) + static_cast(b)); + + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint16_t sqadd_qs16(qint16_t a, qint16_t b) +{ + // We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow + qint32_t tmp = (static_cast(a) + static_cast(b)); + + // Saturate the result in case of overflow and cast to qint16_t + return saturate_convert(tmp); +} + +inline qint8_t ssub_qs8(qint8_t a, qint8_t b) +{ + return a - b; +} + +inline qint8_t sqsub_qs8(qint8_t a, qint8_t b) +{ + // We need to store the temporary result in uint16_t otherwise we cannot evaluate the overflow + qint16_t tmp = static_cast(a) - static_cast(b); + + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(tmp); +} + +inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return static_cast(tmp >> fixed_point_position); +} + +inline qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return saturate_convert(tmp >> fixed_point_position); +} + +inline qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position) +{ + const qint32_t round_up_const = (1 << (fixed_point_position - 1)); + + qint32_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return saturate_convert(tmp >> fixed_point_position); +} + +inline qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + const qint16_t round_up_const = (1 << (fixed_point_position - 1)); + + qint16_t tmp = static_cast(a) * static_cast(b); + + // Rounding up + tmp += round_up_const; + + return tmp >> fixed_point_position; +} + +inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position) +{ + qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24)); + + qint8_t const_three = (3 << fixed_point_position); + qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift); + qint8_t x2 = temp; + + // We need three iterations to find the result + for(int i = 0; i < 3; i++) + { + qint8_t three_minus_dx = ssub_qs8(const_three, smul_qs8(temp, smul_qs8(x2, x2, fixed_point_position), fixed_point_position)); + x2 = (smul_qs8(x2, three_minus_dx, fixed_point_position) >> 1); + } + + temp = shift < 0 ? (x2 << (-shift >> 1)) : (x2 >> (shift >> 1)); + + return temp; +} + +inline qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position) +{ + qint16_t temp = a << fixed_point_position; + return (qint8_t)(temp / b); +} + +inline qint8_t sqexp_qs8(qint8_t a, int fixed_point_position) +{ + // Constants + qint8_t const_one = (1 << fixed_point_position); + qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) | const_one; + qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1; + qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1; + + // Polynomial expansion + int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position); + qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position))); + qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A); + sum = sqmul_qs8(alpha, sum, fixed_point_position); + sum = sqadd_qs8(sum, const_one); + + return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs8(sum, dec_a); +} + +inline qint8_t slog_qs8(qint8_t a, int fixed_point_position) +{ + // Constants + qint8_t const_one = (1 << fixed_point_position); + qint8_t ln2 = (0x58 >> (7 - fixed_point_position)); + qint8_t A = (0x5C >> (7 - fixed_point_position - 1)); + qint8_t B = -(0x56 >> (7 - fixed_point_position)); + qint8_t C = (0x29 >> (7 - fixed_point_position)); + qint8_t D = -(0x0A >> (7 - fixed_point_position)); + + if((const_one == a) || (a < 0)) + { + return 0; + } + else if(a < const_one) + { + return -slog_qs8(sdiv_qs8(const_one, a, fixed_point_position), fixed_point_position); + } + + // Remove even powers of 2 + qint8_t shift_val = 31 - __builtin_clz(a >> fixed_point_position); + a >>= shift_val; + a = ssub_qs8(a, const_one); + + // Polynomial expansion + auto sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B); + sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A); + sum = sqmul_qs8(a, sum, fixed_point_position); + + return smul_qs8(sadd_qs8(sum, shift_val << fixed_point_position), ln2, fixed_point_position); +} + +inline float scvt_f32_qs8(qint8_t a, int fixed_point_position) +{ + return static_cast(a) / (1 << fixed_point_position); +} + +inline qint8_t scvt_qs8_f32(float a, int fixed_point_position) +{ + // round_nearest_integer(a * 2^(fixed_point_position)) + return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); +} + +inline float scvt_f32_qs16(qint16_t a, int fixed_point_position) +{ + return static_cast(a) / (1 << fixed_point_position); +} + +inline qint8_t scvt_qs16_f32(float a, int fixed_point_position) +{ + // round_nearest_integer(a * 2^(fixed_point_position)) + return static_cast(static_cast(a) * (1 << fixed_point_position) + 0.5f); +} + +inline qint8_t sqmovn_qs16(qint16_t a) +{ + // Saturate the result in case of overflow and cast to qint8_t + return saturate_convert(a); +} +} diff --git a/arm_compute/core/HOGInfo.h b/arm_compute/core/HOGInfo.h new file mode 100644 index 0000000000..654629306d --- /dev/null +++ b/arm_compute/core/HOGInfo.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HOGINFO_H__ +#define __ARM_COMPUTE_HOGINFO_H__ + +#include "arm_compute/core/Size2D.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Store the HOG's metadata */ +class HOGInfo +{ +public: + /** Default constructor */ + HOGInfo(); + /** Default destructor */ + virtual ~HOGInfo() = default; + /** Allow instances of this class to be copy constructed */ + HOGInfo(const HOGInfo &) = default; + /** Allow instances of this class to be copied */ + HOGInfo &operator=(const HOGInfo &) = default; + /** Allow instances of this class to be move constructed */ + HOGInfo(HOGInfo &&) = default; + /** Allow instances of this class to be moved */ + HOGInfo &operator=(HOGInfo &&) = default; + /** Constructor + * + * @param[in] cell_size Cell size in pixels + * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. + * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. + * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size + * @param[in] num_bins Number of histogram bins for each cell + * @param[in] normalization_type (Optional) Normalization type to use for each block + * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method + * @param[in] phase_type (Optional) Type of @ref PhaseType + */ + HOGInfo(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, + HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); + /** Initialize the metadata structure with the given parameters + * + * @param[in] cell_size Cell size in pixels + * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. + * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. + * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size + * @param[in] num_bins Number of histogram bins for each cell + * @param[in] normalization_type (Optional) Normalization type to use for each block + * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method + * @param[in] phase_type (Optional) Type of @ref PhaseType + */ + void init(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, + HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); + /** The cell size in pixels + * + * @return The cell size in pixels + */ + const Size2D &cell_size() const; + /** The block size in pixels + * + * @return The block size in pixels + */ + const Size2D &block_size() const; + /** The detection window size in pixels + * + * @return The detection window size in pixels + */ + const Size2D &detection_window_size() const; + /** The block stride in pixels. The block stride is the distance between 2 consecutive blocks + * + * @return The block stride in pixels + */ + const Size2D &block_stride() const; + /** The number of histogram bins for each cell + * + * @return The number of histogram bins for each cell + */ + size_t num_bins() const; + /** The normalization type + * + * @return The normalization type + */ + HOGNormType normalization_type() const; + /** Threshold used for L2HYS_NORM normalization type + * + * @return Threshold used for L2HYS_NORM normalization type + */ + float l2_hyst_threshold() const; + /** The type of @ref PhaseType + * + * @return The type of @ref PhaseType + */ + PhaseType phase_type() const; + /** The size of HOG descriptor + * + * @return The size of HOG descriptor + */ + size_t descriptor_size() const; + /** Calculates the number of cells for each block + * + * @return The Size2D data object which stores the number of cells along the x and y directions + */ + Size2D num_cells_per_block() const; + /** Calculates the number of blocks for the given image size + * + * @param[in] image_size The input image size data object + * + * @return The Size2D data object which stores the number of blocks along the x and y directions + */ + Size2D num_blocks_per_image(const Size2D &image_size) const; + +private: + Size2D _cell_size; + Size2D _block_size; + Size2D _detection_window_size; + Size2D _block_stride; + size_t _num_bins; + HOGNormType _normalization_type; + float _l2_hyst_threshold; + PhaseType _phase_type; + size_t _descriptor_size; +}; +} +#endif /*__ARM_COMPUTE_HOGINFO_H__ */ diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h new file mode 100644 index 0000000000..07318eaf7a --- /dev/null +++ b/arm_compute/core/Helpers.h @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HELPERS_H__ +#define __ARM_COMPUTE_HELPERS_H__ + +#include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/Steps.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include +#include +#include +#include +#include +#include +#include + +namespace arm_compute +{ +class IKernel; +class ITensor; +class ITensorInfo; + +namespace cpp14 +{ +template +struct _Unique_if +{ + typedef std::unique_ptr _Single_object; +}; + +template +struct _Unique_if +{ + typedef std::unique_ptr _Unknown_bound; +}; + +template +struct _Unique_if +{ + typedef void _Known_bound; +}; + +template +typename _Unique_if::_Single_object +make_unique(Args &&... args) +{ + return std::unique_ptr(new T(std::forward(args)...)); +} + +template +typename _Unique_if::_Unknown_bound +make_unique(size_t n) +{ + typedef typename std::remove_extent::type U; + return std::unique_ptr(new U[n]()); +} + +template +typename _Unique_if::_Known_bound +make_unique(Args &&...) = delete; +} + +template +struct enable_bitwise_ops +{ + static constexpr bool value = false; +}; + +template +typename std::enable_if::value, T>::type operator&(T lhs, T rhs) +{ + using underlying_type = typename std::underlying_type::type; + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + +namespace traits +{ +/** Check if a type T is contained in a tuple Tuple of types */ +template +struct is_contained; + +template +struct is_contained> : std::false_type +{ +}; + +template +struct is_contained> : std::true_type +{ +}; + +template +struct is_contained> : is_contained> +{ +}; +} + +/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value. Format: Single channel U8 + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy); + +/** Return the pixel at (x,y) using bilinear interpolation. The image must be single channel U8 + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image; + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y); + +/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel U8 + * + * @warning Only works if the iterator was created with an IImage + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using bilinear interpolation. + */ +inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y); + +/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 + * + * @note The interpolation area depends on the width and height ration of the input and output images + * @note Currently average of the contributing pixels is calculated + * + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] stride Stride in bytes of the image + * @param[in] width Width of the image + * @param[in] height Height of the image + * @param[in] wr Width ratio among the input image width and output image width. + * @param[in] hr Height ratio among the input image height and output image height. + * @param[in] x X position of the wanted pixel + * @param[in] y Y position of the wanted pixel + * + * @return The pixel at (x, y) using area interpolation. + */ +inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y); + +/** Performs clamping among a lower and upper value. + * + * @param[in] n Value to clamp. + * @param[in] lower Lower threshold. + * @param[in] upper Upper threshold. + * + * @return Clamped value. + */ +template +inline T clamp(const T &n, const T &lower, const T &upper) +{ + return std::max(lower, std::min(n, upper)); +} + +/** Base case of for_each. Does nothing. */ +template +inline void for_each(F &&) +{ +} + +/** Call the function for each of the arguments + * + * @param[in] func Function to be called + * @param[in] arg Argument passed to the function + * @param[in] args Remaining arguments + */ +template +inline void for_each(F &&func, T &&arg, Ts &&... args) +{ + func(arg); + for_each(func, args...); +} + +/** Base case of foldl. + * + * @return value. + */ +template +inline T foldl(F &&, const T &value) +{ + return value; +} + +/** Base case of foldl. + * + * @return Function evaluation for value1 and value2 + */ +template +inline auto foldl(F &&func, T &&value1, U &&value2) -> decltype(func(value1, value2)) +{ + return func(value1, value2); +} + +/** Fold left. + * + * @param[in] func Function to be called + * @param[in] initial Initial value + * @param[in] value Argument passed to the function + * @param[in] values Remaining arguments + */ +template +inline I foldl(F &&func, I &&initial, T &&value, Vs &&... values) +{ + return foldl(std::forward(func), func(std::forward(initial), std::forward(value)), std::forward(values)...); +} + +/** Iterator updated by @ref execute_window_loop for each window element */ +class Iterator +{ +public: + /** Default constructor to create an empty iterator */ + constexpr Iterator(); + /** Create a container iterator for the metadata and allocation contained in the ITensor + * + * @param[in] tensor The tensor to associate to the iterator. + * @param[in] window The window which will be used to iterate over the tensor. + */ + Iterator(const ITensor *tensor, const Window &window); + + /** Increment the iterator along the specified dimension of the step value associated to the dimension. + * + * @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow. + * + * @note When incrementing a dimension 'n' the coordinates of all the dimensions in the range (0,n-1) are reset. For example if you iterate over a 2D image, everytime you change row (dimension 1), the iterator for the width (dimension 0) is reset to its start. + * + * @param[in] dimension Dimension to increment + */ + void increment(size_t dimension); + + /** Return the offset in bytes from the first element to the current position of the iterator + * + * @return The current position of the iterator in bytes relative to the first element. + */ + constexpr int offset() const; + + /** Return a pointer to the current pixel. + * + * @warning Only works if the iterator was created with an ITensor. + * + * @return equivalent to buffer() + offset() + */ + constexpr uint8_t *ptr() const; + + /** Move the iterator back to the beginning of the specified dimension. + * + * @param[in] dimension Dimension to reset + */ + void reset(size_t dimension); + +private: + uint8_t *_ptr; + + class Dimension + { + public: + constexpr Dimension() + : _dim_start(0), _stride(0) + { + } + + int _dim_start; + int _stride; + }; + + std::array _dims; +}; + +/** Iterate through the passed window, automatically adjusting the iterators and calling the lambda_functino for each element. + * It passes the x and y positions to the lambda_function for each iteration + * + * @param[in] w Window to iterate through. + * @param[in] lambda_function The function of type void(function)( const Coordinates & id ) to call at each iteration. + * Where id represents the absolute coordinates of the item to process. + * @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function. + */ +template +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators); + +/** Update window and padding size for each of the access patterns. + * + * First the window size is reduced based on all access patterns that are not + * allowed to modify the padding of the underlying tensor. Then the padding of + * the remaining tensors is increased to match the window. + * + * @param[in] win Window that is used by the kernel. + * @param[in] patterns Access patterns used to calculate the final window and padding. + * + * @return True if the window has been changed. Changes to the padding do not + * influence the returned value. + */ +template +bool update_window_and_padding(Window &win, Ts &&... patterns) +{ + bool window_changed = false; + + for_each([&](const IAccessWindow & w) + { + window_changed |= w.update_window_if_needed(win); + }, + patterns...); + + bool padding_changed = false; + + for_each([&](const IAccessWindow & w) + { + padding_changed |= w.update_padding_if_needed(win); + }, + patterns...); + + return window_changed; +} + +/** Calculate the maximum window for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] skip_border (Optional) If true exclude the border region from the window. + * @param[in] border_size (Optional) Border size. The border region will be excluded from the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); + +/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. + * + * @param[in] info Tensor info object defining the shape of the object for which the window is created. + * @param[in] steps (Optional) Number of elements processed for each step. + * @param[in] border_size (Optional) Border size. The border region will be included in the window. + * + * @return The maximum window the kernel can be executed on. + */ +Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); + +/** Intersect multiple valid regions. + * + * @param[in] regions Valid regions. + * + * @return Intersection of all regions. + */ +template +ValidRegion intersect_valid_regions(Ts &&... regions) +{ + auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion + { + ValidRegion region; + + for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) + { + region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); + } + + for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) + { + region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); + } + + return region; + }; + + return foldl(intersect, std::forward(regions)...); +} + +/** Create a strides object based on the provided strides and the tensor dimensions. + * + * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides. + * @param[in] stride_x Stride to be used in X dimension (in bytes). + * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes). + * + * @return Strides object based on the specified strides. Missing strides are + * calculated based on the tensor shape and the strides of lower dimensions. + */ +template +inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) +{ + const TensorShape &shape = info.tensor_shape(); + + // Create strides object + Strides strides(stride_x, fixed_strides...); + + for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) + { + strides.set(i, shape[i - 1] * strides[i - 1]); + } + + return strides; +} + +/** Create a strides object based on the tensor dimensions. + * + * @param[in] info Tensor info object used to compute the strides. + * + * @return Strides object based on element size and tensor shape. + */ +template +inline Strides compute_strides(const ITensorInfo &info) +{ + return compute_strides(info, info.element_size()); +} + +/* Auto initialize the tensor info (shape, number of channels, data type and fixed point position) if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * @param[in] num_channels New number of channels. + * @param[in] data_type New data type + * @param[in] fixed_point_position New fixed point position + * + * @return True if the tensor info has been initialized + */ +bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, int fixed_point_position); + +/* Set the shape to the specified value if the current assignment is empty. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] shape New shape. + * + * @return True if the shape has been changed. + */ +bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape); + +/* Set the format, data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] format New format. + * + * @return True if the format has been changed. + */ +bool set_format_if_unknown(ITensorInfo &info, Format format); + +/* Set the data type and number of channels to the specified value if + * the current data type is unknown. + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] data_type New data type. + * + * @return True if the data type has been changed. + */ +bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type); + +/* Set the fixed point position to the specified value if + * the current fixed point position is 0 and the data type is QS8 or QS16 + * + * @param[in,out] info Tensor info used to check and assign. + * @param[in] fixed_point_position New fixed point position + * + * @return True if the fixed point position has been changed. + */ +bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position); +} // namespace arm_compute + +#include "arm_compute/core/Helpers.inl" +#endif /*__ARM_COMPUTE_HELPERS_H__ */ diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl new file mode 100644 index 0000000000..f885810078 --- /dev/null +++ b/arm_compute/core/Helpers.inl @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" + +#include +#include + +namespace arm_compute +{ +inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = *pixel_ptr; + const float a01 = *(pixel_ptr + 1); + const float a10 = *(pixel_ptr + stride); + const float a11 = *(pixel_ptr + stride + 1); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + + return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; +} + +inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + const int32_t xi = x; + const int32_t yi = y; + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1u8(first_pixel_ptr + xi + yi * stride, stride, dx, dy); +} + +inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + x = std::max(-1.f, std::min(x, static_cast(width))); + y = std::max(-1.f, std::min(y, static_cast(height))); + + const float xi = std::floor(x); + const float yi = std::floor(y); + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1u8(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, dx, dy); +} + +inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + // Calculate sampling position + float in_x = (x + 0.5f) * wr - 0.5f; + float in_y = (y + 0.5f) * hr - 0.5f; + + // Get bounding box offsets + int x_from = std::floor(x * wr - 0.5f - in_x); + int y_from = std::floor(y * hr - 0.5f - in_y); + int x_to = std::ceil((x + 1) * wr - 0.5f - in_x); + int y_to = std::ceil((y + 1) * hr - 0.5f - in_y); + + // Clamp position to borders + in_x = std::max(-1.f, std::min(in_x, static_cast(width))); + in_y = std::max(-1.f, std::min(in_y, static_cast(height))); + + // Clamp bounding box offsets to borders + x_from = ((in_x + x_from) < -1) ? -1 : x_from; + y_from = ((in_y + y_from) < -1) ? -1 : y_from; + x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to; + y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to; + + // Get pixel index + const int xi = std::floor(in_x); + const int yi = std::floor(in_y); + + // Bounding box elements in each dimension + const int x_elements = (x_to - x_from + 1); + const int y_elements = (y_to - y_from + 1); + ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0); + + // Sum pixels in area + int sum = 0; + for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) + { + const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; + sum = std::accumulate(ptr, ptr + x_elements, sum); + } + + // Return average + return sum / (x_elements * y_elements); +} + +template +struct IncrementIterators +{ + template + static void unroll(T &&it, Ts &&... iterators) + { + it.increment(dimension); + IncrementIterators::unroll(std::forward(iterators)...); + } + + template + static void unroll(T &&it) + { + it.increment(dimension); + // End of recursion + } + + static void unroll() + { + // End of recursion + } +}; + +template +struct ForEachDimension +{ + template + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + { + const auto &d = w[dim - 1]; + + for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...)) + { + id.set(dim - 1, v); + ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...); + } + } +}; + +template <> +struct ForEachDimension<0> +{ + template + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + { + lambda_function(id); + } +}; + +template +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators) +{ + w.validate(); + + Coordinates id; + ForEachDimension::unroll(w, id, std::forward(lambda_function), std::forward(iterators)...); +} + +inline constexpr Iterator::Iterator() + : _ptr(nullptr), _dims() +{ +} + +inline Iterator::Iterator(const ITensor *tensor, const Window &win) + : Iterator() +{ + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + const ITensorInfo *info = tensor->info(); + ARM_COMPUTE_ERROR_ON(info == nullptr); + const Strides &strides = info->strides_in_bytes(); + + _ptr = tensor->buffer() + info->offset_first_element_in_bytes(); + + //Initialize the stride for each dimension and calculate the position of the first element of the iteration: + for(unsigned int n = 0; n < info->num_dimensions(); ++n) + { + _dims[n]._stride = win[n].step() * strides[n]; + std::get<0>(_dims)._dim_start += strides[n] * win[n].start(); + } + + //Copy the starting point to all the dimensions: + for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n) + { + _dims[n]._dim_start = std::get<0>(_dims)._dim_start; + } + + ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, info->num_dimensions()); +} + +inline void Iterator::increment(const size_t dimension) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + + _dims[dimension]._dim_start += _dims[dimension]._stride; + + for(unsigned int n = 0; n < dimension; ++n) + { + _dims[n]._dim_start = _dims[dimension]._dim_start; + } +} + +inline constexpr int Iterator::offset() const +{ + return _dims.at(0)._dim_start; +} + +inline constexpr uint8_t *Iterator::ptr() const +{ + return _ptr + _dims.at(0)._dim_start; +} + +inline void Iterator::reset(const size_t dimension) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions - 1); + + _dims[dimension]._dim_start = _dims[dimension + 1]._dim_start; + + for(unsigned int n = 0; n < dimension; ++n) + { + _dims[n]._dim_start = _dims[dimension]._dim_start; + } +} + +inline bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, int fixed_point_position) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_data_type(data_type); + info.set_tensor_shape(shape); + info.set_num_channels(num_channels); + info.set_fixed_point_position(fixed_point_position); + return true; + } + + return false; +} + +inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) +{ + if(info.tensor_shape().total_size() == 0) + { + info.set_tensor_shape(shape); + return true; + } + + return false; +} + +inline bool set_format_if_unknown(ITensorInfo &info, Format format) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_format(format); + return true; + } + + return false; +} + +inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) +{ + if(info.data_type() == DataType::UNKNOWN) + { + info.set_data_type(data_type); + return true; + } + + return false; +} + +inline bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_position) +{ + if(info.fixed_point_position() == 0 && (info.data_type() == DataType::QS8 || info.data_type() == DataType::QS16)) + { + info.set_fixed_point_position(fixed_point_position); + return true; + } + + return false; +} +} // namespace arm_compute diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h new file mode 100644 index 0000000000..cf7490d53e --- /dev/null +++ b/arm_compute/core/IAccessWindow.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IACCESS_WINDOW_H__ +#define __ARM_COMPUTE_IACCESS_WINDOW_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class Window; +class ITensorInfo; + +/** Decrease @p required in steps of @p step until it's less than @p available. + * + * @param[in] required Number of required bytes. + * @param[in] available Number of available bytes. + * @param[in] step Step size used to decrease required bytes. + * + * @return Largest value smaller than @p available that is a multiple of @p step + * + **/ +inline int adjust_down(int required, int available, int step) +{ + ARM_COMPUTE_ERROR_ON(step <= 0); + + return required - step * ((required - available + step - 1) / step); +} + +/** Increase @p required in steps of @p step until it's greater than @p available. + * + * @param[in] required Number of required bytes. + * @param[in] available Number of available bytes. + * @param[in] step Step size used to increase required bytes. + * + * @return Largest value smaller than @p available that is a multiple of @p step + * + **/ +inline int adjust_up(int required, int available, int step) +{ + ARM_COMPUTE_ERROR_ON(step <= 0); + + return required + step * ((available - required + step - 1) / step); +} + +/** Interface describing methods to update access window and padding based on kernel parameters. */ +class IAccessWindow +{ +public: + virtual ~IAccessWindow() = default; + /** Shrink the window if padding is not large enough. + * + * @param[in] window Window used by the kernel. + * + * @return True if the window has been changed. + */ + virtual bool update_window_if_needed(Window &window) const = 0; + /** Increase the padding to be large enough for the window. + * + * @param[in] window Window used by the kernel. + * + * @return True if the padding has been changed. + */ + virtual bool update_padding_if_needed(const Window &window) const = 0; + /** Compute the valid region based on access pattern and valid region of the inputs. + * + * @note This method assumes that there is no border. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + * @param[in] border_undefined Undefined borders are excluded from the valid region. + * @param[in] border_size Size of the border around the XY-plane of the tensor. + */ + virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0; +}; + +/** Implementation of a rectangular access pattern. */ +class AccessWindowRectangle : public IAccessWindow +{ +public: + /** Constructor for a rectangular access pattern. + * + * @note Width and height have to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] y Offset of the access in Y direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] height Number of elements that are accessed in Y direction. + */ + AccessWindowRectangle(ITensorInfo *info, int x, int y, int width, int height) + : AccessWindowRectangle(info, x, y, width, height, 1.f, 1.f) + { + } + + /** Constructor for a rectangular access pattern. + * + * @note Width, height and scale have to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] y Offset of the access in Y direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] height Number of elements that are accessed in Y direction. + * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined + * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowRectangle(ITensorInfo *info, int x, int y, int width, int height, float scale_x, float scale_y) + : _info(info), _x(x), _y(y), _width(width), _height(height), _scale_x(scale_x), _scale_y(scale_y) + { + ARM_COMPUTE_ERROR_ON(width < 0); + ARM_COMPUTE_ERROR_ON(height < 0); + ARM_COMPUTE_ERROR_ON(scale_x < 0); + ARM_COMPUTE_ERROR_ON(scale_y < 0); + } + + AccessWindowRectangle(const AccessWindowRectangle &) = delete; + AccessWindowRectangle &operator=(const AccessWindowRectangle &) = delete; + AccessWindowRectangle(AccessWindowRectangle &&) = default; + AccessWindowRectangle &operator=(AccessWindowRectangle &&) = default; + ~AccessWindowRectangle() = default; + + /** Set the valid region based on access pattern, valid region of the inputs and border mode. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + * @param[in] border_undefined (Optional) Undefined borders are excluded from the valid region. + * @param[in] border_size (Optional) Size of the border around the XY-plane of the tensor. + */ + void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0)); + + /** Compute the valid region based on access pattern, valid region of the inputs and border mode. + * + * @note This method assumes that there is no border. + * + * @param[in] window Execution window of the kernel. + * @param[in] input_valid_region Combined valid region of all inputs. + */ + ValidRegion compute_valid_region(const Window &window, const ValidRegion &input_valid_region) const; + + // Inherited methods overridden: + + /** @note This method assumes that all elements written by the kernel are valid. */ + ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + + bool update_window_if_needed(Window &window) const override; + bool update_padding_if_needed(const Window &window) const override; + +protected: + ITensorInfo *_info; + int _x; + int _y; + int _width; + int _height; + float _scale_x; + float _scale_y; +}; + +/** Implementation of a column access pattern. */ +class AccessWindowVertical : public AccessWindowRectangle +{ +public: + /** Constructor for a column access pattern. + * + * @note Height has to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] y Offset of the access in Y direction. + * @param[in] height Number of elements that are accessed in Y direction. + * @param[in] scale_y Ratio along the Y direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowVertical(ITensorInfo *info, int y, int height, float scale_y = 1.f) + : AccessWindowRectangle(info, 0, y, 1, height, 1.f, scale_y) + { + ARM_COMPUTE_ERROR_ON(height < 0); + ARM_COMPUTE_ERROR_ON(scale_y < 0); + } +}; + +/** Implementation of a row access pattern. */ +class AccessWindowHorizontal : public AccessWindowRectangle +{ +public: + /** Constructor for a row access pattern. + * + * @note Width has to be non-negative. + * + * @param[in,out] info Tensor info of the accessed kernel. + * @param[in] x Offset of the access in X direction. + * @param[in] width Number of elements that are accessed in X direction. + * @param[in] scale_x Ratio along the X direction between the window used by the execute_window_loop and the rectangular access pattern defined + */ + AccessWindowHorizontal(ITensorInfo *info, int x, int width, float scale_x = 1.f) + : AccessWindowRectangle(info, x, 0, width, 1, scale_x, 1.f) + { + ARM_COMPUTE_ERROR_ON(width < 0); + ARM_COMPUTE_ERROR_ON(scale_x < 0); + } +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_IACCESS_WINDOW_H__*/ diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h new file mode 100644 index 0000000000..2ed56100cf --- /dev/null +++ b/arm_compute/core/IArray.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IARRAY_H__ +#define __ARM_COMPUTE_IARRAY_H__ + +#include "arm_compute/core/Error.h" +#include +#include + +namespace arm_compute +{ +class KeyPoint; +class Coordinates2D; +class DetectionWindow; +class Size2D; + +/** Array of type T */ +template +class IArray +{ +public: + /** Default constructor */ + IArray() + : _num_values(0), _max_size(0) {}; + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + IArray(size_t max_num_values) + : _num_values(0), _max_size(max_num_values) + { + } + /** Maximum number of values which can be stored in this array + * + * @return Maximum number of values + */ + size_t max_num_values() const + { + return _max_size; + } + /** Default virtual destructor */ + virtual ~IArray() = default; + /** Number of values currently stored in the array + * + * @return Number of values currently stored in the array or max_num_values + 1 if the array is overflowed. + */ + size_t num_values() const + { + return _num_values; + } + /** Append the passed argument to the end of the array if there is room. + * + * @param[in] val Value to add to the array. + * + * @return True if the point was successfully added to the array. False if the array is full and the point couldn't be added. + */ + bool push_back(const T &val) + { + ARM_COMPUTE_ERROR_ON(0 == _max_size); + if(_num_values >= max_num_values()) + { + _num_values = max_num_values() + 1; + return false; + } + at(_num_values) = val; + _num_values++; + return true; + } + /** Clear all the points from the array. */ + void clear() + { + _num_values = 0; + } + /** Did we lose some values because the array is too small? + * + * @return True if we tried to add a value using push_back() but there wasn't any room left to store it. + * False if all the values were successfully added to the array. + */ + bool overflow() const + { + return _num_values > max_num_values(); + } + /** Pointer to the first element of the array + * + * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_poins(). + * + * @return A pointer to the first element of the array + */ + virtual T *buffer() const = 0; + /** Reference to the element of the array located at the given index + * + * @param[in] index Index of the element + * + * @return A reference to the element of the array located at the given index. + */ + virtual T &at(size_t index) const + { + ARM_COMPUTE_ERROR_ON(buffer() == nullptr); + ARM_COMPUTE_ERROR_ON(index >= max_num_values()); + return buffer()[index]; + } + /** Resizes the array to contain "num" elements. If "num" is smaller than the maximum array size, the content is reduced to its first "num" elements, + * "num" elements can't be bigger than the maximum number of values which can be stored in this array. + * + * @param[in] num The new array size in number of elements + */ + void resize(size_t num) + { + ARM_COMPUTE_ERROR_ON(num > max_num_values()); + _num_values = num; + }; + +private: + size_t _num_values; + size_t _max_size; +}; +using IKeyPointArray = IArray; +using ICoordinates2DArray = IArray; +using IDetectionWindowArray = IArray; +using ISize2DArray = IArray; +using IUInt8Array = IArray; +using IUInt16Array = IArray; +using IUInt32Array = IArray; +using IInt16Array = IArray; +using IInt32Array = IArray; +using IFloatArray = IArray; +} +#endif /* __ARM_COMPUTE_IARRAY_H__ */ diff --git a/arm_compute/core/IDistribution.h b/arm_compute/core/IDistribution.h new file mode 100644 index 0000000000..b57543a3bf --- /dev/null +++ b/arm_compute/core/IDistribution.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IDISTRIBUTION_H__ +#define __ARM_COMPUTE_IDISTRIBUTION_H__ + +#include +#include + +namespace arm_compute +{ +/** Interface for distribution objects */ +class IDistribution +{ +public: + /** Default virtual destructor */ + virtual ~IDistribution() = default; + /** Returns the dimensions of the distribution. + * + * @note This is fixed to 1-dimensional distribution for now. + * @return Dimensions of the distribution. + */ + virtual size_t dimensions() const = 0; + /** Returns the total size in bytes of the distribution. + * + * @return Total size of the distribution in bytes. + */ + virtual size_t size() const = 0; + /** Returns a pointer to the start of the distribution. + * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_bins() + * + * @return Pointer to the start of the distribution. + */ + virtual uint32_t *buffer() const = 0; + /** Clears the distribution by setting every element to zero. */ + void clear() const; +}; +} +#endif /* __ARM_COMPUTE_IDISTRIBUTION_H__ */ diff --git a/arm_compute/core/IDistribution1D.h b/arm_compute/core/IDistribution1D.h new file mode 100644 index 0000000000..ca8bfc0a7d --- /dev/null +++ b/arm_compute/core/IDistribution1D.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_IDISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution.h" + +#include +#include + +namespace arm_compute +{ +/** 1D Distribution interface */ +class IDistribution1D : public IDistribution +{ +public: + /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + IDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Returns the number of bins that the distribution has. + * + * @return Number of bins of the distribution. + */ + size_t num_bins() const; + /** Returns the offset of the distribution. + * + * @return Offset of the distribution. + */ + int32_t offset() const; + /** Returns the range of the distribution. + * + * @return Range of the distribution. + */ + uint32_t range() const; + /** Returns the window of the distribution, which is the range divided by the number of bins. + * + * @note If range is not divided by the number of bins then it is invalid. + * + * @return Window of the distribution. + */ + uint32_t window() const; + /** Sets the range of the distribution. + * + * @param[in] range New range of the distribution to be set. + */ + void set_range(uint32_t range); + + // Inherited methods overridden: + size_t size() const override; + size_t dimensions() const override; + +private: + size_t _num_bins; /**< Number of bins. */ + int32_t _offset; /**< Offset, which indicate the start of the usable values. */ + uint32_t _range; /**< The total number of consecutive values of the distribution interval */ +}; +} +#endif /* __ARM_COMPUTE_IDISTRIBUTION1D_H__ */ diff --git a/arm_compute/core/IHOG.h b/arm_compute/core/IHOG.h new file mode 100644 index 0000000000..8bf713ae82 --- /dev/null +++ b/arm_compute/core/IHOG.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IHOG_H__ +#define __ARM_COMPUTE_IHOG_H__ + +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class HOGInfo; +/** Interface for HOG data-object */ +class IHOG +{ +public: + /** Interface to be implemented by the child class to return the HOG's metadata + * + * @return A pointer to the HOG's metadata. + */ + virtual const HOGInfo *info() const = 0; + /** Default virtual destructor */ + virtual ~IHOG() = default; + /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor + * + * @note Other elements of the array can be accessed using descriptor()[idx] for idx=[0, descriptor_size() - 1] + * + * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor + */ + virtual float *descriptor() const = 0; +}; +} +#endif /* __ARM_COMPUTE_IHOG_H__ */ diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h new file mode 100644 index 0000000000..4f3812b6da --- /dev/null +++ b/arm_compute/core/IKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IKERNEL_H__ +#define __ARM_COMPUTE_IKERNEL_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" + +namespace arm_compute +{ +/** Common information for all the kernels */ +class IKernel +{ +public: + /** Constructor */ + IKernel(); + /** Destructor */ + virtual ~IKernel() = default; + /** Indicates whether or not the kernel is parallelisable + * + * If the kernel is parallelisable then the window returned by window() can be split into sub-windows + * which can then be run in parallel. + * + * If the kernel is not parallelisable then only the window returned by window() can be passed to run() + * + * @return True if the kernel is parallelisable + */ + virtual bool is_parallelisable() const; + /** The size of the border for that kernel + * + * @return The width in number of elements of the border. + */ + virtual BorderSize border_size() const; + /** The maximum window the kernel can be executed on + * + * @return The maximum window the kernel can be executed on. + */ + const Window &window() const; + +protected: + /** Configure the kernel's window + * + * @param[in] window The maximum window which will be returned by window() + */ + void configure(const Window &window); + +private: + Window _window; +}; +} +#endif /*__ARM_COMPUTE_IKERNEL_H__ */ diff --git a/arm_compute/core/ILut.h b/arm_compute/core/ILut.h new file mode 100644 index 0000000000..5223aea67a --- /dev/null +++ b/arm_compute/core/ILut.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ILUT_H__ +#define __ARM_COMPUTE_ILUT_H__ + +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Lookup Table object interface. */ +class ILut +{ +public: + /** Default virtual destructor */ + virtual ~ILut() = default; + /** Returns the total number of elements in the LUT. + * + * @return Total number of elements. + */ + virtual size_t num_elements() const = 0; + /** Indicates the offset that needs to be applied to the raw index before performing a lookup in the LUT. + * + * @return The normalization offset. + */ + virtual uint32_t index_offset() const = 0; + /** Returns the total size in bytes of the LUT. + * + * @return Total size of the LUT in bytes. + */ + virtual size_t size_in_bytes() const = 0; + /** Returns the type of the LUT. + * + * @return The type of the LUT. + */ + virtual DataType type() const = 0; + /** Returns a pointer to the start of the LUT. + * Other elements of the LUT can be accessed using buffer()[idx] for 0 <= idx < num_elements(). + * + * @return Pointer to the start of the lut. + */ + virtual uint8_t *buffer() const = 0; + /** Clears the LUT by setting every element to zero. */ + virtual void clear() = 0; +}; +} +#endif /* __ARM_COMPUTE_ILUT_H__ */ diff --git a/arm_compute/core/IMultiHOG.h b/arm_compute/core/IMultiHOG.h new file mode 100644 index 0000000000..e91da75398 --- /dev/null +++ b/arm_compute/core/IMultiHOG.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMULTIHOG_H__ +#define __ARM_COMPUTE_IMULTIHOG_H__ + +#include "arm_compute/core/IHOG.h" + +#include + +namespace arm_compute +{ +/** Interface for storing multiple HOG data-objects */ +class IMultiHOG +{ +public: + /** Default destructor */ + virtual ~IMultiHOG() = default; + /** The number of HOG models stored + * + * @return The number of HOG models stored + */ + virtual size_t num_models() const = 0; + /** Return a pointer to the requested HOG model + * + * @param[in] index The index of the wanted HOG model. + * + * @return A pointer pointed to the HOG model + */ + virtual IHOG *model(size_t index) = 0; + /** Return a const pointer to the requested HOG model + * + * @param[in] index The index of the wanted HOG model. + * + * @return A const pointer pointed to the HOG model + */ + virtual const IHOG *model(size_t index) const = 0; +}; +} + +#endif /* __ARM_COMPUTE_IMULTIHOG_H__ */ diff --git a/arm_compute/core/IMultiImage.h b/arm_compute/core/IMultiImage.h new file mode 100644 index 0000000000..6ed3c785ca --- /dev/null +++ b/arm_compute/core/IMultiImage.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IMULTIIMAGE_H__ +#define __ARM_COMPUTE_IMULTIIMAGE_H__ + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; +class MultiImageInfo; + +/** Interface for multi-planar images */ +class IMultiImage +{ +public: + /** Destructor */ + virtual ~IMultiImage() = default; + /** Interface to be implemented by the child class to return the multi-planar image's metadata + * + * @return A pointer to the image's metadata. + */ + virtual const MultiImageInfo *info() const = 0; + /** Return a pointer to the requested plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A pointer pointed to the plane + */ + virtual IImage *plane(unsigned int index) = 0; + /** Return a constant pointer to the requested plane of the image. + * + * @param[in] index The index of the wanted planed. + * + * @return A constant pointer pointed to the plane + */ + virtual const IImage *plane(unsigned int index) const = 0; +}; +} +#endif /*__ARM_COMPUTE_IMULTIIMAGE_H__ */ diff --git a/arm_compute/core/IPyramid.h b/arm_compute/core/IPyramid.h new file mode 100644 index 0000000000..e5d7011cf9 --- /dev/null +++ b/arm_compute/core/IPyramid.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IPYRAMID_H__ +#define __ARM_COMPUTE_IPYRAMID_H__ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Interface for pyramid data-object */ +class IPyramid +{ +public: + /** Default virtual destructor */ + virtual ~IPyramid() = default; + /** Interface to be implemented by the child class to return the Pyramid's metadata + * + * @return A pointer to the Pyramid's metadata. + */ + virtual const PyramidInfo *info() const = 0; + /** Retrieves a level of the pyramid as a ITensor pointer + * + * @param[in] index The index of the level, such that index is less than levels. + * + * @return An ITensor pointer + */ + virtual ITensor *get_pyramid_level(size_t index) const = 0; +}; +} + +#endif /* __ARM_COMPUTE_IPYRAMID_H__ */ diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h new file mode 100644 index 0000000000..202b50a0d8 --- /dev/null +++ b/arm_compute/core/ITensor.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSOR_H__ +#define __ARM_COMPUTE_ITENSOR_H__ + +#include "arm_compute/core/TensorInfo.h" + +#include + +namespace arm_compute +{ +class Coordinates; + +/** Interface for NEON tensor */ +class ITensor +{ +public: + /** Interface to be implemented by the child class to return the tensor's metadata + * + * @return A pointer to the tensor's metadata. + */ + virtual ITensorInfo *info() const = 0; + /** Interface to be implemented by the child class to return the tensor's metadata + * + * @return A pointer to the tensor's metadata. + */ + virtual ITensorInfo *info() = 0; + /** Default virtual destructor */ + virtual ~ITensor() = default; + /** Interface to be implemented by the child class to return a pointer to CPU memory + * + * @return A CPU pointer to the beginning of the image's allocation. + */ + virtual uint8_t *buffer() const = 0; + + /** Return a pointer to the element at the passed coordinates + * + * @param[in] id Coordinates of the element + * + * @return Pointer to the requested element + */ + inline uint8_t *ptr_to_element(const Coordinates &id) const + { + return buffer() + info()->offset_element_in_bytes(id); + } + + /** Copy the content of another tensor. + * + * @note The number of dimensions of the source tensor must be less or equal to those of the destination tensor. + * + * @note All dimensions of the destination tensor must be greater or equal to the source tensor ones. + * + * @note num_channels() and element_size() of both tensors must match. + * + * @param[in] src Source tensor to copy from. + */ + void copy_from(const ITensor &src); + + /** Print a tensor to a given stream using user defined formatting information + * + * @param s Output stream + * @param io_fmt Format information + */ + void print(std::ostream &s, IOFormatInfo io_fmt = IOFormatInfo()) const; +}; + +using IImage = ITensor; +} +#endif /*__ARM_COMPUTE_ITENSOR_H__ */ diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h new file mode 100644 index 0000000000..bb3ac6e35e --- /dev/null +++ b/arm_compute/core/ITensorInfo.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSORINFO_H__ +#define __ARM_COMPUTE_ITENSORINFO_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace arm_compute +{ +/** Store the tensor's metadata */ +class ITensorInfo +{ +public: + /** Default virtual destructor */ + virtual ~ITensorInfo() = default; + /** Set the data type to the specified value. + * + * @warning This resets the format to UNKNOWN. + * + * @param[in] data_type The new data type. + */ + virtual void set_data_type(DataType data_type) = 0; + /** Set the number of channels to the specified value. + * + * @warning This resets the format to UNKNOWN. + * + * @param[in] num_channels New number of channels. + */ + virtual void set_num_channels(int num_channels) = 0; + /** Set the format of an already initialized tensor. + * + * @note If the data type has already been configured (i.e. not UNKNOWN) it + * must match the new format. If data type hasn't been configured it will + * be based on the format. + * + * @param[in] format Single-plane format of the tensor. + */ + virtual void set_format(Format format) = 0; + /** Set the shape of an already initialized tensor. + * + * @warning Changing the shape requires to recompute the strides and is + * therefore only possible if the tensor hasn't been allocated yet. + * + * @param[in] shape New tensor shape. + */ + virtual void set_tensor_shape(TensorShape shape) = 0; + /** Set the fixed point position to the specified value + * + * @warning The fixed point position must be set once the data type has been configured + * + * @param[in] fixed_point_position The new fixed point position + */ + virtual void set_fixed_point_position(int fixed_point_position) = 0; + /** Update the offset to the first element and the strides to automatically computed values. + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @return True if the strides or the offset to the first element have changed. + */ + virtual bool auto_padding() = 0; + /** Update the offset to the first element, the strides and the total size. + * + * @note This function can only increase the offset, strides and total size. + * + * @param[in] padding Padding around the XY plane in number of elements. + * + * @return True if the strides, offset and total size have changed. + */ + virtual bool extend_padding(const PaddingSize &padding) = 0; + /** Return the size of the requested dimension + * + * @param[in] index Index of the dimension + * + * @return Dimension of the requested dimension + */ + virtual size_t dimension(size_t index) const = 0; + /** The strides in bytes for accessing each dimension of the tensor + * + * @return Strides in bytes for each tensor dimension + */ + virtual const Strides &strides_in_bytes() const = 0; + /** The offset from the beginning of the memory allocation to the first element of the tensor. + * This can be used to access efficiently elements in a 2D tensor + * + * @return The offset in bytes to access the first element of the tensor. + */ + virtual size_t offset_first_element_in_bytes() const = 0; + /** The offset in bytes from the beginning of the memory allocation to access the element at position (x, y, z ...) + * + * @param[in] pos Vector with the coordinates of the element to access. + * The size of this vector must be equal to the number of dimensions of the tensor + * + * @return Offset in bytes from the beginning of the memory allocation to access the element (x, y, z, ...) + */ + virtual size_t offset_element_in_bytes(const Coordinates &pos) const = 0; + /** Fixed point position used when the tensor data type is QS8 or QS16 + * + * @return The fixed point position that expresses the number of bits for the fractional part of the number + */ + virtual int fixed_point_position() const = 0; + /** Element size in bytes calculated as data_size() * num_channels() + * + * @return The size of one element in bytes + */ + virtual size_t element_size() const = 0; + /** The number of dimensions of the tensor (rank) + * + * @return The number of dimensions of the tensor (rank) + */ + virtual size_t num_dimensions() const = 0; + /** The number of channels for each tensor element + * + * @return The number of channels for each tensor element + */ + virtual size_t num_channels() const = 0; + /** Size for each dimension of the tensor + * + * @return A vector with the size for each dimension of the tensor + */ + virtual const TensorShape &tensor_shape() const = 0; + /** Data type used for each element of the tensor + * + * @return Tensor data type + */ + virtual DataType data_type() const = 0; + /** Colour format of the image + * + * @return Colour format of the image + */ + virtual Format format() const = 0; + /** Returns the total size of the tensor in bytes. + * + * @return Total size of the tensor in bytes. + */ + virtual size_t total_size() const = 0; + /** Padding of tensor. + * + * @return Padding. + */ + virtual PaddingSize padding() const = 0; + /** Checks if the tensor has been allocated with padding or not. + * + * @return True if padding is allocated in the tensor, otherwise false. + */ + virtual bool has_padding() const = 0; + /** Flag indicating whether the size of the tensor can be changed. + * + * @return True if the tensor size can be changed. + */ + virtual bool is_resizable() const = 0; + /** Set the flag whether the tensor size can be changed. + * + * @param[in] is_resizable Flag that marks the tensor if it can be changed or not. + */ + virtual void set_is_resizable(bool is_resizable) = 0; + /** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined. + * + * @return The valid region. + */ + virtual ValidRegion valid_region() const = 0; + /** Set the valid region of the tensor. + * + * @param[in] valid_region Valid region to set. + */ + virtual void set_valid_region(ValidRegion valid_region) = 0; +}; +} +#endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/MultiImageInfo.h b/arm_compute/core/MultiImageInfo.h new file mode 100644 index 0000000000..6d76953845 --- /dev/null +++ b/arm_compute/core/MultiImageInfo.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIIMAGEINFO_H__ +#define __ARM_COMPUTE_MULTIIMAGEINFO_H__ + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** Store the multi-planar image's metadata */ +class MultiImageInfo +{ +public: + /** Constructor */ + MultiImageInfo(); + /** Initialize the metadata structure with the given parameters + * + * @param[in] width Width of the image (in number of pixels) + * @param[in] height Height of the image (in number of pixels) + * @param[in] format Colour format of the image. + */ + void init(unsigned int width, unsigned int height, Format format); + /** Colour format of the image + * + * @return Colour format of the image + */ + Format format() const; + /** Width in pixels + * + * @return The width in pixels + */ + unsigned int width() const; + /** Height in pixels + * + * @return The height in pixels + */ + unsigned int height() const; + +protected: + unsigned int _width; + unsigned int _height; + Format _format; +}; +} +#endif /*__ARM_COMPUTE_MULTIIMAGEINFO_H__ */ diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/NEON/INEKernel.h new file mode 100644 index 0000000000..3ac8164a51 --- /dev/null +++ b/arm_compute/core/NEON/INEKernel.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INEKERNEL_H__ +#define __ARM_COMPUTE_INEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPKernel.h" + +namespace arm_compute +{ +using INEKernel = ICPPKernel; +} +#endif /*__ARM_COMPUTE_INEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h new file mode 100644 index 0000000000..ca25532ef1 --- /dev/null +++ b/arm_compute/core/NEON/INESimpleKernel.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INESIMPLEKERNEL_H__ +#define __ARM_COMPUTE_INESIMPLEKERNEL_H__ + +#include "arm_compute/core/CPP/ICPPSimpleKernel.h" + +namespace arm_compute +{ +using INESimpleKernel = ICPPSimpleKernel; +} +#endif /*__ARM_COMPUTE_INESIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl new file mode 100644 index 0000000000..9be7c8a658 --- /dev/null +++ b/arm_compute/core/NEON/NEColorConvertHelper.inl @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace +{ +constexpr float red_coef_bt709 = 1.5748F; +constexpr float green_coef_bt709 = -0.1873f; +constexpr float green_coef2_bt709 = -0.4681f; +constexpr float blue_coef_bt709 = 1.8556f; + +constexpr float rgb2yuv_bt709_kr = 0.2126f; +constexpr float rgb2yuv_bt709_kb = 0.0722f; +// K_g = 1 - K_r - K_b +constexpr float rgb2yuv_bt709_kg = 0.7152f; +// C_u = 1 / (2 * (1 - K_b)) +constexpr float rgb2yuv_bt709_cu = 0.5389f; +// C_v = 1 / (2 * (1 - K_r)) +constexpr float rgb2yuv_bt709_cv = 0.6350f; + +inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out) +{ + const auto tmp1 = vmovl_u8(vget_low_u8(in)); + out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1))); + out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1))); + const auto tmp2 = vmovl_u8(vget_high_u8(in)); + out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2))); + out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2))); +} + +inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out) +{ + out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])), + vqmovn_u32(vcvtq_u32_f32(in2.val[0])))); + out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])), + vqmovn_u32(vcvtq_u32_f32(in2.val[1])))); + out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])), + vqmovn_u32(vcvtq_u32_f32(in2.val[2])))); +} + +inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out) +{ + const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])), + vqmovn_u32(vcvtq_u32_f32(in.val[1]))); + const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])), + vqmovn_u32(vcvtq_u32_f32(in.val[3]))); + out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high)); +} + +inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec, + float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec) +{ + /* + Y'= 0.2126*R' + 0.7152*G' + 0.0722*B' + U'=-0.1146*R' - 0.3854*G' + 0.5000*B' + V'= 0.5000*R' - 0.4542*G' - 0.0458*B' + */ + const auto c128 = vdupq_n_f32(128.f); + + // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b + yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr); + yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg); + yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb); + + // U = (B - Y) / (2 * (1 - K_b)) + uvec = vsubq_f32(bvec, yvec); + uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu); + + // V = (R - Y) / (2 * (1 - K_r)) + vvec = vsubq_f32(rvec, yvec); + vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv); +} + +inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val, + float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha) +{ + float32x4x3_t rgb1, rgb2; + + // Compute: cb - 128 and cr - 128; + const auto c128 = vdupq_n_f32(128.f); + uvec_val = vsubq_f32(uvec_val, c128); + vvec_val = vsubq_f32(vvec_val, c128); + + // Compute: + // r = 0.0000f*f_u + 1.5748f*f_v; + // g = 0.1873f*f_u - 0.4681f*f_v; + // b = 1.8556f*f_u + 0.0000f*f_v; + const auto red = vmulq_n_f32(vvec_val, red_coef_bt709); + const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709); + const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709), + vmulq_n_f32(vvec_val, green_coef2_bt709)); + + // Compute the final r,g,b values using y1 for the first texel and y2 for the second one. + // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t + // and written back to memory using vst3 instruction + + rgb1.val[0] = vaddq_f32(yvec_val, red); + rgb1.val[1] = vaddq_f32(yvec_val, green); + rgb1.val[2] = vaddq_f32(yvec_val, blue); + + rgb2.val[0] = vaddq_f32(yyvec_val, red); + rgb2.val[1] = vaddq_f32(yyvec_val, green); + rgb2.val[2] = vaddq_f32(yyvec_val, blue); + + uint8x8x3_t u8_rgb; + convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb); + + if(!alpha) + { + vst3_lane_u8(&output_ptr[0], u8_rgb, 0); + vst3_lane_u8(&output_ptr[3], u8_rgb, 4); + vst3_lane_u8(&output_ptr[6], u8_rgb, 1); + vst3_lane_u8(&output_ptr[9], u8_rgb, 5); + vst3_lane_u8(&output_ptr[12], u8_rgb, 2); + vst3_lane_u8(&output_ptr[15], u8_rgb, 6); + vst3_lane_u8(&output_ptr[18], u8_rgb, 3); + vst3_lane_u8(&output_ptr[21], u8_rgb, 7); + } + else + { + uint8x8x4_t u8_rgba; + u8_rgba.val[0] = u8_rgb.val[0]; + u8_rgba.val[1] = u8_rgb.val[1]; + u8_rgba.val[2] = u8_rgb.val[2]; + u8_rgba.val[3] = vdup_n_u8(255); + vst4_lane_u8(&output_ptr[0], u8_rgba, 0); + vst4_lane_u8(&output_ptr[4], u8_rgba, 4); + vst4_lane_u8(&output_ptr[8], u8_rgba, 1); + vst4_lane_u8(&output_ptr[12], u8_rgba, 5); + vst4_lane_u8(&output_ptr[16], u8_rgba, 2); + vst4_lane_u8(&output_ptr[20], u8_rgba, 6); + vst4_lane_u8(&output_ptr[24], u8_rgba, 3); + vst4_lane_u8(&output_ptr[28], u8_rgba, 7); + } +} + +inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha) +{ + uint8x16x3_t rgb; + + if(alpha) + { + const auto tmp = vld4q_u8(ptr); + rgb.val[0] = tmp.val[0]; + rgb.val[1] = tmp.val[1]; + rgb.val[2] = tmp.val[2]; + } + else + { + rgb = vld3q_u8(ptr); + } + + return rgb; +} + +inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom) +{ + // Convert the uint8x16_t to float32x4x4_t + float32x4x4_t frvec_top, fgvec_top, fbvec_top; + convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top); + convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top); + convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top); + + float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom; + convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom); + convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom); + convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom); + + float32x4x4_t fyvec_top, fuvec_top, fvvec_top; + float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom; + + for(auto i = 0; i < 4; ++i) + { + rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i], + fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]); + rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i], + fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]); + } + + convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]); + convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]); + convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]); + convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]); + convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]); + convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]); +} + +inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, + const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, + unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, + unsigned char *const __restrict out_uv) +{ + uint8x16x3_t vec_top, vec_bottom; + vec_top.val[0] = rvec_top; + vec_top.val[1] = gvec_top; + vec_top.val[2] = bvec_top; + vec_bottom.val[0] = rvec_bottom; + vec_bottom.val[1] = gvec_bottom; + vec_bottom.val[2] = bvec_bottom; + + rgb_to_yuv_conversion(vec_top, vec_bottom); + + vst1q_u8(out_y_top, vec_top.val[0]); + vst1q_u8(out_y_bottom, vec_bottom.val[0]); + + const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]); + const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]); + const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]); + const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]); + + uint8x8x2_t uvvec; + uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp)); + uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp)); + + vst2_u8(out_uv, uvvec); +} + +inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, + const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, + unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, + unsigned char *const __restrict out_u, + unsigned char *const __restrict out_v) +{ + uint8x16x3_t vec_top, vec_bottom; + vec_top.val[0] = rvec_top; + vec_top.val[1] = gvec_top; + vec_top.val[2] = bvec_top; + vec_bottom.val[0] = rvec_bottom; + vec_bottom.val[1] = gvec_bottom; + vec_bottom.val[2] = bvec_bottom; + + rgb_to_yuv_conversion(vec_top, vec_bottom); + + vst1q_u8(out_y_top, vec_top.val[0]); + vst1q_u8(out_y_bottom, vec_bottom.val[0]); + + const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]); + const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]); + const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]), + vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1])); + + vst1_u8(out_u, vget_low_u8(uvvec)); + vst1_u8(out_v, vget_high_u8(uvvec)); +} + +inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec, + unsigned char *const __restrict out_y, + unsigned char *const __restrict out_u, + unsigned char *const __restrict out_v) +{ + // Convert the uint8x16_t to float32x4x4_t + float32x4x4_t frvec, fgvec, fbvec; + convert_uint8x16_to_float32x4x4(rvec, frvec); + convert_uint8x16_to_float32x4x4(gvec, fgvec); + convert_uint8x16_to_float32x4x4(bvec, fbvec); + + float32x4x4_t fyvec, fuvec, fvvec; + for(auto i = 0; i < 4; ++i) + { + rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i], + fyvec.val[i], fuvec.val[i], fvvec.val[i]); + } + + uint8x16_t yvec, uvec, vvec; + convert_float32x4x4_to_unit8x16(fyvec, yvec); + convert_float32x4x4_to_unit8x16(fuvec, uvec); + convert_float32x4x4_to_unit8x16(fvvec, vvec); + + vst1q_u8(out_y, yvec); + vst1q_u8(out_u, uvec); + vst1q_u8(out_v, vvec); +} +} + +namespace arm_compute +{ +void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta1 = vld3q_u8(in.ptr()); + uint8x16x4_t ta2; + ta2.val[0] = ta1.val[0]; + ta2.val[1] = ta1.val[1]; + ta2.val[2] = ta1.val[2]; + ta2.val[3] = vdupq_n_u8(255); + vst4q_u8(out.ptr(), ta2); + }, + in, out); +} + +void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta1 = vld4q_u8(in.ptr()); + uint8x16x3_t ta2; + ta2.val[0] = ta1.val[0]; + ta2.val[1] = ta1.val[1]; + ta2.val[2] = ta1.val[2]; + vst3q_u8(out.ptr(), ta2); + }, + in, out); +} + +template +void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + constexpr auto shift = yuyv ? 0 : 1; + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + float32x4x4_t uvec, yvec, vvec, yyvec; + const auto ta = vld4q_u8(in.ptr()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + // Convert the uint8x16x4_t to float32x4x4_t + convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec); + convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec); + convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec); + convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec); + + yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + }, + in, out); +} + +template +void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + const auto out_stride = output_ptr->info()->strides_in_bytes().y(); + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + // Convert the uint8x16x4_t to float32x4x4_t + float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; + convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); + convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); + convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec); + convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec); + + yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + + yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); + }, + in_y, in_uv, out); +} + +template +void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto element_size = alpha ? 32 : 24; + const auto out_stride = output_ptr->info()->strides_in_bytes().y(); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_u = vld1q_u8(in_u.ptr()); + const auto ta_v = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_u.val[0] = U0 U2 U4 U6 ... + //ta_v.val[0] = V0 V2 V4 V6 ... + + // Convert the uint8x16x4_t to float32x4x4_t + float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; + convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); + convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); + convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); + convert_uint8x16_to_float32x4x4(ta_u, uvec); + convert_uint8x16_to_float32x4x4(ta_v, vvec); + + yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); + + yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); + yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); + }, + in_y, in_u, in_v, out); +} + +template +void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = yuyv ? 0 : 1; + + // NV12's UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_top = vld4q_u8(in.ptr()); + const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + uint8x16x2_t yvec; + yvec.val[0] = ta_top.val[0 + shift]; + yvec.val[1] = ta_top.val[2 + shift]; + vst2q_u8(out_y.ptr(), yvec); + + uint8x16x2_t yyvec; + yyvec.val[0] = ta_bottom.val[0 + shift]; + yyvec.val[1] = ta_bottom.val[2 + shift]; + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); + + uint8x16x2_t uvvec; + uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); + uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); + vst2q_u8(out_uv.ptr(), uvvec); + }, + in, out_y, out_uv); +} + +void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + uint8x16x2_t ta_uv; + ta_uv.val[0] = vld1q_u8(in_u.ptr()); + ta_uv.val[1] = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + vst2q_u8(out_uv.ptr(), ta_uv); + }, + in_y, in_u, in_v, out_y, out_uv); +} + +template +void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]); + vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]); + }, + in_y, in_uv, out_y, out_u, out_v); +} + +template +void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = yuyv ? 0 : 1; + + // Destination's UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_top = vld4q_u8(in.ptr()); + const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); + //ta.val[0] = Y0 Y2 Y4 Y6 ... + //ta.val[1] = U0 U2 U4 U6 ... + //ta.val[2] = Y1 Y3 Y5 Y7 ... + //ta.val[3] = V0 V2 V4 V7 ... + + uint8x16x2_t yvec; + yvec.val[0] = ta_top.val[0 + shift]; + yvec.val[1] = ta_top.val[2 + shift]; + vst2q_u8(out_y.ptr(), yvec); + + uint8x16x2_t yyvec; + yyvec.val[0] = ta_bottom.val[0 + shift]; + yyvec.val[1] = ta_bottom.val[2 + shift]; + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); + + uint8x16_t uvec; + uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); + vst1q_u8(out_u.ptr(), uvec); + + uint8x16_t vvec; + vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); + vst1q_u8(out_v.ptr(), vvec); + }, + in, out_y, out_u, out_v); +} + +template +void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + constexpr auto shift = uv ? 0 : 1; + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_uv(input_ptr->plane(1), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_uv = vld2q_u8(in_uv.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_uv.val[0] = U0 U2 U4 U6 ... + //ta_uv.val[1] = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + + uint8x16x2_t uvec; + uvec.val[0] = ta_uv.val[0 + shift]; + uvec.val[1] = ta_uv.val[0 + shift]; + vst2q_u8(out_u.ptr(), uvec); + vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); + + uint8x16x2_t vvec; + vvec.val[0] = ta_uv.val[1 - shift]; + vvec.val[1] = ta_uv.val[1 - shift]; + vst2q_u8(out_v.ptr(), vvec); + vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); + }, + in_y, in_uv, out_y, out_u, out_v); +} + +void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in_y(input_ptr->plane(0), win); + Iterator in_u(input_ptr->plane(1), win_uv); + Iterator in_v(input_ptr->plane(2), win_uv); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_y_top = vld2q_u8(in_y.ptr()); + const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); + const auto ta_u = vld1q_u8(in_u.ptr()); + const auto ta_v = vld1q_u8(in_v.ptr()); + //ta_y.val[0] = Y0 Y2 Y4 Y6 ... + //ta_y.val[1] = Y1 Y3 Y5 Y7 ... + //ta_u = U0 U2 U4 U6 ... + //ta_v = V0 V2 V4 V6 ... + + vst2q_u8(out_y.ptr(), ta_y_top); + vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); + + uint8x16x2_t uvec; + uvec.val[0] = ta_u; + uvec.val[1] = ta_u; + vst2q_u8(out_u.ptr(), uvec); + vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); + + uint8x16x2_t vvec; + vvec.val[0] = ta_v; + vvec.val[1] = ta_v; + vst2q_u8(out_v.ptr(), vvec); + vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); + }, + in_y, in_u, in_v, out_y, out_u, out_v); +} + +template +void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_uv(output_ptr->plane(1), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb_top = load_rgb(in.ptr(), alpha); + const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], + ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], + out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), + out_uv.ptr()); + }, + in, out_y, out_uv); +} + +template +void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + // UV's width and height are subsampled + Window win_uv(win); + win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); + win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); + win_uv.validate(); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win_uv); + Iterator out_v(output_ptr->plane(2), win_uv); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb_top = load_rgb(in.ptr(), alpha); + const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], + ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], + out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), + out_u.ptr(), out_v.ptr()); + }, + in, out_y, out_u, out_v); +} + +template +void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + win.validate(); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out_y(output_ptr->plane(0), win); + Iterator out_u(output_ptr->plane(1), win); + Iterator out_v(output_ptr->plane(2), win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta_rgb = load_rgb(in.ptr(), alpha); + //ta_rgb.val[0] = R0 R1 R2 R3 ... + //ta_rgb.val[1] = G0 G1 G2 G3 ... + //ta_rgb.val[2] = B0 B1 B2 B3 ... + + store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2], + out_y.ptr(), out_u.ptr(), out_v.ptr()); + }, + in, out_y, out_u, out_v); +} +} diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/NEON/NEFixedPoint.h new file mode 100644 index 0000000000..fb712611cb --- /dev/null +++ b/arm_compute/core/NEON/NEFixedPoint.h @@ -0,0 +1,686 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__ +#define __ARM_COMPUTE_NEFIXEDPOINT_H__ + +#include "arm_compute/core/FixedPoint.h" + +#include + +namespace arm_compute +{ +using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */ +using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */ +using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */ +using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */ +using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */ +using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */ +using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */ +using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */ +using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */ +using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */ +using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */ +using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */ +using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */ +using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */ +using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */ +using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */ + +/** Get the lower half of a 16 elements vector + * + * @param[in] a vector of 16 elements + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vget_low_qs8(qint8x16_t a); + +/** Get the higher half of a 16 elements vector + * + * @param[in] a vector of 16 elements + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vget_high_qs8(qint8x16_t a); + +/** Load a single 8 bit fixed point vector from memory (8 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point vector to load + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vld1_qs8(const qint8_t *addr); + +/** Load a single 8 bit fixed point vector from memory (16 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point vector to load + * + * @return 8 bit fixed point vector (16 elements) + */ +qint8x16_t vld1q_qs8(const qint8_t *addr); + +/** Load a single 16 bit fixed point vector from memory (4 elements) + * + * @param[in] addr Memory address of the 16 bit fixed point vector to load + * + * @return 16 bit fixed point vector (4 elements) + */ +qint16x4_t vld1_qs16(const qint16_t *addr); + +/** Load a single 16 bit fixed point vector from memory (8 elements) + * + * @param[in] addr Memory address of the 16 bit fixed point vector to load + * + * @return 16 bit fixed point vector (8 elements) + */ +qint16x8_t vld1q_qs16(const qint16_t *addr); + +/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point scalar value to load + * + * @return 8 bit fixed point vector (8 elements) + */ +qint8x8_t vld1_dup_qs8(const qint8_t *addr); + +/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements) + * + * @param[in] addr Memory address of the 8 bit fixed point scalar value to load + * + * @return 8 bit fixed point vector (16 elements) + */ +qint8x16_t vld1q_dup_qs8(const qint8_t *addr); + +/** Store a single 8 bit fixed point vector to memory (8 elements) + * + * @param[in] addr Memory address where the 8 bit fixed point vector should be stored + * @param[in] b 8 bit fixed point vector to store + * + */ +void vst1_qs8(qint8_t *addr, qint8x8_t b); + +/** Store a single 8 bit fixed point vector to memory (16 elements) + * + * @param[in] addr Memory address where the 8 bit fixed point vector should be stored + * @param[in] b 8 bit fixed point vector to store + * + */ +void vst1q_qs8(qint8_t *addr, qint8x16_t b); + +/** Store a single 16 bit fixed point vector to memory (4 elements) + * + * @param[in] addr Memory address where the 16 bit fixed point vector should be stored + * @param[in] b 16 bit fixed point vector to store + * + */ +void vst1_qs16(qint16_t *addr, qint16x4_t b); + +/** Store a single 8 bit fixed point vector to memory (16 elements) + * + * @param[in] addr Memory address where the 16 bit fixed point vector should be stored + * @param[in] b 16 bit fixed point vector to store + * + */ +void vst1q_qs16(qint16_t *addr, qint16x8_t b); + +/** 16 bit fixed point vector saturating narrow (8 elements) + * + * @param[in] a 16 bit fixed point vector to convert + * + * @return 8 bit fixed point vector + */ +qint8x8_t vqmovn_q16(qint16x8_t a); + +/** 8 bit fixed point vector duplicate (8 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint8x8_t vdup_n_qs8(qint8_t a); + +/** 8 bit fixed point vector duplicate (16 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint8x16_t vdupq_n_qs8(qint8_t a); + +/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point to duplicate + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the vector duplication + */ +qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position); + +/** 16 bit fixed point vector duplicate (8 elements) + * + * @param[in] a 16 bit fixed point to duplicate + * + * @return The result of the vector duplication + */ +qint16x8_t vdupq_n_qs16(qint16x8_t a); + +/** Absolute value of 8 bit fixed point vector (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x8_t vabs_qs8(qint8x8_t a); + +/** Absolute value of 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x16_t vabsq_qs8(qint8x16_t a); + +/** Saturating absolute value of 8 bit fixed point vector (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x8_t vqabs_qs8(qint8x8_t a); + +/** Saturating absolute value of 8 bit fixed point vector (16 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector absolute value + */ +qint8x16_t vqabsq_qs8(qint8x16_t a); + +/** 8 bit fixed point vector max (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector max (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector pairwise max (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector pairwise max operation + */ +qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector min (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector max operation + */ +qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector min (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector min operation + */ +qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector pairwise min (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector pairwise min operation + */ +qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector add (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition + */ +qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector add (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition + */ +qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector saturating add (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector saturating add (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b); + +/** 16 bit fixed point vector saturating add (4 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b); + +/** 16 bit fixed point vector saturating add (8 elements) + * + * @param[in] a First 16 bit fixed point input vector + * @param[in] b Second 16 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b); + +/** 8 bit fixed point vector saturating pairwise add (8 elements) + * + * @param[in] a 8 bit fixed point input vector + * + * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow + */ +int16x4_t vpaddl_qs8(qint8x8_t a); + +/** 8 bit fixed point vector subtraction (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction + */ +qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector subtraction (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction + */ +qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector saturating subtraction (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b); + +/** 8 bit fixed point vector saturating subtraction (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * + * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow + */ +qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b); + +/** 8 bit fixed point vector multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. + */ +qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector multiply (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. + */ +qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply (16 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow + */ +qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position); + +/** 8 bit fixed point vector long multiply (8 elements) + * + * @param[in] a First 8 bit fixed point input vector + * @param[in] b Second 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point long vector multiplication. + */ +qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate + */ +qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate + */ +qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow + */ +qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c). + * + * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow + */ +qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position); + +/** 8 bit fixed point vector multiply-accumulate long (8 elements). + * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate long + */ +qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector. + * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements + * + * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to + * @param[in] b Second 8 bit fixed point input vector + * @param[in] c Third 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8 bit fixed point vector multiply-accumulate long + */ +qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position); + +/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position); + +/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements + * + * @param[in] a Float input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion float -> 8 bit fixed point + */ +qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position); + +/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements + * + * @param[in] a 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float32x2x4 + */ +float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position); + +/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements + * + * @param[in] a 8 bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the conversion 8 bit fixed point -> float32x4x4 + */ +float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position); + +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit reciprocal (1/a). + */ +qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit reciprocal (1/a). + */ +qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position); + +/** Division fixed point 8bit (8 elements) + * + * @param[in] a First 8bit fixed point input vector + * @param[in] b Second 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in fixed point format. + */ +qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position); + +/** Division fixed point 8bit (16 elements) + * + * @param[in] a First 8bit fixed point input vector + * @param[in] b Second 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The quotient and remainder number in 8bit fixed point format. + */ +qint8x16_t vdivq_qs8(qint8x16_t a, int8x16_t b, int fixed_point_position); + +/** Perform a 4th degree polynomial approximation. (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit taylor approximation. + */ +template +qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position); + +/** Perform a 4th degree polynomial approximation. (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit taylor approximation. + */ +template +qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating exponential fixed point 8bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit saturating exponential + */ +qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate saturating exponential fixed point 8bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit saturating exponential + */ +qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate logarithm fixed point 16bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit logarithm. + */ +qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate logarithm fixed point 16bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit logarithm. + */ +qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit inverse sqrt. + */ +qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate hyperbolic tangent for fixed point 8bit (8 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position); + +/** Calculate hyperbolic tangent for fixed point 8bit (16 elements) + * + * @param[in] a 8bit fixed point input vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The calculated Hyperbolic Tangent. + */ +qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position); + +/** Calculate saturating n power for fixed point 8bit (16 elements). + * + * pow(a,b) = e^(b*log(a)) + * + * @param[in] a 8bit fixed point input vector + * @param[in] b 8bit fixed point power vector + * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number + * + * @return The result of the 8bit power. + */ +qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position); +} +#include "arm_compute/core/NEON/NEFixedPoint.inl" +#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */ diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl new file mode 100644 index 0000000000..6db344dc11 --- /dev/null +++ b/arm_compute/core/NEON/NEFixedPoint.inl @@ -0,0 +1,1018 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +namespace arm_compute +{ +/**< Exponent polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements */ +const std::array exp_tab_qs8 = +{ + { + vdup_n_s8(0x7F), // 0.9978546 + vdup_n_s8(0x3F), // 0.4994721 + vdup_n_s8(0x16), // 0.1763723 + vdup_n_s8(0x05), // 0.0435108 + } +}; + +/**< Exponent polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements */ +const std::array exp_tabq_qs8 = +{ + { + vdupq_n_s8(0x7F), // 0.9978546 + vdupq_n_s8(0x3F), // 0.4994721 + vdupq_n_s8(0x16), // 0.1763723 + vdupq_n_s8(0x05), // 0.0435108 + } +}; + +/**< Logarithm polynomial coefficients for 8 bit fixed point (8 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +const std::array log_tab_qs8 = +{ + { + vdup_n_s8(0x5C), // 1.4384189 + vdup_n_s8(-0x56), // -0.6771900 + vdup_n_s8(0x29), // 0.3218538 + vdup_n_s8(-0x0A), // -0.0832229 + } +}; + +/**< Logarithm polynomial coefficients for 8 bit fixed point (16 elements) + * Format is in Q0.7 for all elements except the first one which is in Q1.6 */ +const std::array log_tabq_qs8 = +{ + { + vdupq_n_s8(0x5C), // 1.4384189 + vdupq_n_s8(-0x56), // -0.6771900 + vdupq_n_s8(0x29), // 0.3218538 + vdupq_n_s8(-0x0A), // -0.0832229 + } +}; + +inline qint8x8_t vget_low_qs8(qint8x16_t a) +{ + return vget_low_s8(a); +} + +inline qint8x8_t vget_high_qs8(qint8x16_t a) +{ + return vget_high_s8(a); +} + +inline qint8x8_t vld1_qs8(const qint8_t *addr) +{ + return vld1_s8(addr); +} + +inline qint8x16_t vld1q_qs8(const qint8_t *addr) +{ + return vld1q_s8(addr); +} + +inline qint16x4_t vld1_qs16(const qint16_t *addr) +{ + return vld1_s16(addr); +} + +inline qint16x8_t vld1q_qs16(const qint16_t *addr) +{ + return vld1q_s16(addr); +} + +inline qint8x8_t vld1_dup_qs8(const qint8_t *addr) +{ + return vld1_dup_s8(addr); +} + +inline qint8x16_t vld1q_dup_qs8(const qint8_t *addr) +{ + return vld1q_dup_s8(addr); +} + +inline void vst1_qs8(qint8_t *addr, qint8x8_t b) +{ + vst1_s8(addr, b); +} + +inline void vst1q_qs8(qint8_t *addr, qint8x16_t b) +{ + vst1q_s8(addr, b); +} + +inline void vst1_qs16(qint16_t *addr, qint16x4_t b) +{ + vst1_s16(addr, b); +} + +inline void vst1q_qs16(qint16_t *addr, qint16x8_t b) +{ + vst1q_s16(addr, b); +} + +inline qint8x8_t vqmovn_qs16(qint16x8_t a) +{ + return vqmovn_s16(a); +} + +inline qint8x8_t vdup_n_qs8(qint8_t a) +{ + return vdup_n_s8(a); +} + +inline qint8x16_t vdupq_n_qs8(qint8_t a) +{ + return vdupq_n_s8(a); +} + +inline qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position) +{ + float32x4x4_t res = + { + { + vdupq_n_f32(a), + vdupq_n_f32(a), + vdupq_n_f32(a), + vdupq_n_f32(a), + } + }; + return vcvtq_qs8_f32(res, fixed_point_position); +} + +inline qint16x8_t vdupq_n_qs16(qint16_t a) +{ + return vdupq_n_s16(a); +} + +inline qint8x8_t vabs_qs8(qint8x8_t a) +{ + return vabs_s8(a); +} + +inline qint8x16_t vabsq_qs8(qint8x16_t a) +{ + return vabsq_s8(a); +} + +inline qint8x8_t vqabs_qs8(qint8x8_t a) +{ + return vqabs_s8(a); +} + +inline qint8x16_t vqabsq_qs8(qint8x16_t a) +{ + return vqabsq_s8(a); +} + +inline qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b) +{ + return vmax_s8(a, b); +} + +inline qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vmaxq_s8(a, b); +} + +inline qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b) +{ + return vpmax_s8(a, b); +} + +inline qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b) +{ + return vmin_s8(a, b); +} + +inline qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vminq_s8(a, b); +} + +inline qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b) +{ + return vpmin_s8(a, b); +} + +inline qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b) +{ + return vadd_s8(a, b); +} + +inline qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vaddq_s8(a, b); +} + +inline qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b) +{ + return vqadd_s8(a, b); +} + +inline qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vqaddq_s8(a, b); +} + +inline qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b) +{ + return vqadd_s16(a, b); +} + +inline qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b) +{ + return vqaddq_s16(a, b); +} + +inline int16x4_t vpaddl_qs8(qint8x8_t a) +{ + return vpaddl_s8(a); +} + +inline qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b) +{ + return vsub_s8(a, b); +} + +inline qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vsubq_s8(a, b); +} + +inline qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b) +{ + return vqsub_s8(a, b); +} + +inline qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b) +{ + return vqsubq_s8(a, b); +} + +inline qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s8(res, a, b); + + // Shift right by fixed_point_position + res = vshlq_s16(res, fixed_point_position_s16); + + // Convert back to qint8 + return vmovn_s16(res); +} + +inline qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b)); + res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b)); + + // Shift right by fixed_point_position + res0 = vshlq_s16(res0, fixed_point_position_s16); + res1 = vshlq_s16(res1, fixed_point_position_s16); + + // Convert back to qint8 + return vcombine_s8(vmovn_s16(res0), vmovn_s16(res1)); +} + +inline qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary result with a constant used to round up the result + qint16x8_t res = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + res = vmlal_s8(res, a, b); + + // Shift right by fixed_point_position + res = vqshlq_s16(res, fixed_point_position_s16); + + // Convert back to qint8 and saturate + return vqmovn_s16(res); +} + +inline qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t res0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t res1 = res0; + + // Vector multiply-accumulate long + res0 = vmlal_s8(res0, vget_low_s8(a), vget_low_s8(b)); + res1 = vmlal_s8(res1, vget_high_s8(a), vget_high_s8(b)); + + // Shift right by fixed_point_position + res0 = vqshlq_s16(res0, fixed_point_position_s16); + res1 = vqshlq_s16(res1, fixed_point_position_s16); + + // Convert back to qint8 and saturate + return vcombine_s8(vqmovn_s16(res0), vqmovn_s16(res1)); +} + +inline qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + qint16x8_t res = vmull_s8(a, b); + + return vqrshlq_s16(res, fixed_point_position_s16); +} + +inline qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s16(tmp, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vadd_s8(a, vmovn_s16(tmp)); +} + +inline qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c)); + tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c)); + + // Shift right by fixed_point_position + tmp0 = vshlq_s16(tmp0, fixed_point_position_s16); + tmp1 = vshlq_s16(tmp1, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vcombine_s8(vadd_s8(vget_low_s8(a), vmovn_s16(tmp0)), vadd_s8(vget_high_s8(a), vmovn_s16(tmp1))); +} + +inline qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s16(tmp, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + return vqadd_s8(a, vqmovn_s16(tmp)); +} + +inline qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp0 = vdupq_n_s16(1 << (fixed_point_position - 1)); + qint16x8_t tmp1 = tmp0; + + // Vector multiply-accumulate long + tmp0 = vmlal_s8(tmp0, vget_low_s8(b), vget_low_s8(c)); + tmp1 = vmlal_s8(tmp1, vget_high_s8(b), vget_high_s8(c)); + + // Shift right by fixed_point_position + tmp0 = vqshlq_s16(tmp0, fixed_point_position_s16); + tmp1 = vqshlq_s16(tmp1, fixed_point_position_s16); + + // Convert back to qint8 and accumulate + qint8x16_t res = vcombine_s8(vqmovn_s16(tmp0), vqmovn_s16(tmp1)); + return vqaddq_s8(a, res); +} + +inline qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vshlq_s16(tmp, fixed_point_position_s16); + + // Accumulate + return vaddq_s16(a, tmp); +} + +inline qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position) +{ + const int16x8_t fixed_point_position_s16 = vdupq_n_s16(-fixed_point_position); + + // Initialize the temporary results with a constant used to round up the result + qint16x8_t tmp = vdupq_n_s16(1 << (fixed_point_position - 1)); + + // Vector multiply-accumulate long + tmp = vmlal_s8(tmp, b, c); + + // Shift right by fixed_point_position + tmp = vqshlq_s16(tmp, fixed_point_position_s16); + + // Accumulate + return vqaddq_s16(a, tmp); +} + +inline qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4x2_t res_f32 = + { + { + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f) + } + }; + + res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2); + res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2); + + const int32x4x2_t res_s32 = + { + { + vcvtq_s32_f32(res_f32.val[0]), + vcvtq_s32_f32(res_f32.val[1]), + } + }; + + const int16x8_t res_s16 = vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])); + + return vqmovn_s16(res_s16); +} + +inline qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(static_cast(1 << fixed_point_position)); + + float32x4x4_t res_f32 = + { + { + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f), + vdupq_n_f32(0.5f) + } + }; + + res_f32.val[0] = vmlaq_f32(res_f32.val[0], a.val[0], pow2); + res_f32.val[1] = vmlaq_f32(res_f32.val[1], a.val[1], pow2); + res_f32.val[2] = vmlaq_f32(res_f32.val[2], a.val[2], pow2); + res_f32.val[3] = vmlaq_f32(res_f32.val[3], a.val[3], pow2); + + const int32x4x4_t res_s32 = + { + { + vcvtq_s32_f32(res_f32.val[0]), + vcvtq_s32_f32(res_f32.val[1]), + vcvtq_s32_f32(res_f32.val[2]), + vcvtq_s32_f32(res_f32.val[3]), + } + }; + + const int16x8x2_t res_s16 = + { + { + vcombine_s16(vqmovn_s32(res_s32.val[0]), vqmovn_s32(res_s32.val[1])), + vcombine_s16(vqmovn_s32(res_s32.val[2]), vqmovn_s32(res_s32.val[3])), + } + }; + + return vcombine_s8(vqmovn_s16(res_s16.val[0]), vqmovn_s16(res_s16.val[1])); +} + +inline float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + + const int16x8_t res_s16 = vmovl_s8(a); + + const int32x4x2_t res_s32 = + { + { + vmovl_s16(vget_low_s16(res_s16)), + vmovl_s16(vget_high_s16(res_s16)) + } + }; + + float32x4x2_t res_f32 = + { + { + vcvtq_f32_s32(res_s32.val[0]), + vcvtq_f32_s32(res_s32.val[1]) + } + }; + + res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2); + res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2); + + return res_f32; +} + +inline float32x4x4_t vcvtq_f32_qs8(qint8x16_t a, int fixed_point_position) +{ + const float32x4_t pow2 = vdupq_n_f32(1.0f / (1 << fixed_point_position)); + + const int16x8x2_t res_s16 = + { + { + vmovl_s8(vget_low_s8(a)), + vmovl_s8(vget_high_s8(a)), + } + }; + + const int32x4x4_t res_s32 = + { + { + vmovl_s16(vget_low_s16(res_s16.val[0])), + vmovl_s16(vget_high_s16(res_s16.val[0])), + vmovl_s16(vget_low_s16(res_s16.val[1])), + vmovl_s16(vget_high_s16(res_s16.val[1])), + } + }; + + float32x4x4_t res_f32 = + { + { + vcvtq_f32_s32(res_s32.val[0]), + vcvtq_f32_s32(res_s32.val[1]), + vcvtq_f32_s32(res_s32.val[2]), + vcvtq_f32_s32(res_s32.val[3]) + } + }; + + res_f32.val[0] = vmulq_f32(res_f32.val[0], pow2); + res_f32.val[1] = vmulq_f32(res_f32.val[1], pow2); + res_f32.val[2] = vmulq_f32(res_f32.val[2], pow2); + res_f32.val[3] = vmulq_f32(res_f32.val[3], pow2); + + return res_f32; +} + +inline qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x8_t const_48_over_17 = vdup_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x8_t const_minus_32_over_17 = vdup_n_s8(-(0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + const qint8x8_t temp = vshl_s8(a, shift_value); + + qint8x8_t x = vadd_s8(const_48_over_17, vmul_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + uint8x8_t set_one = vcgt_s8(x, const_one); + x = vbsl_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vadd_s8(x, vmul_qs8(x, vsub_s8(const_one, vmul_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vshl_s8(x, shift_value); +} + +inline qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + const qint8x16_t temp = vshlq_s8(a, shift_value); + + qint8x16_t x = vsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint8x16_t set_one = vcgtq_s8(x, const_one); + x = vbslq_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vaddq_s8(x, vmulq_qs8(x, vsubq_s8(const_one, vmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vshlq_s8(x, shift_value); +} + +inline qint8x16_t vqrecipq_qs8(qint8x16_t a, int fixed_point_position) +{ + // We need two bits to store 2, thus we can only support formats from Q2.5 to Q7.0 + const qint8x16_t const_48_over_17 = vdupq_n_s8(0x7A >> (5 - fixed_point_position)); // 2.823 + const qint8x16_t const_minus_32_over_17 = vdupq_n_s8((0x3C >> (5 - fixed_point_position))); // -1.8823 + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + + // Find shift value + const qint8x16_t shift_value = vqnegq_s8(vqsubq_s8(vdupq_n_s8(8), vqaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + const qint8x16_t temp = vqshlq_s8(a, shift_value); + + qint8x16_t x = vqsubq_qs8(const_48_over_17, vmulq_qs8(temp, const_minus_32_over_17, fixed_point_position)); + + // Set initial guess to one if x > 1 + uint8x16_t set_one = vcgtq_s8(x, const_one); + x = vbslq_s8(set_one, const_one, x); + + // Use three iterations of Newton-Raphson method to get the result + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + x = vqaddq_s8(x, vqmulq_qs8(x, vqsubq_s8(const_one, vqmulq_qs8(temp, x, fixed_point_position)), fixed_point_position)); + + return vqshlq_s8(x, shift_value); +} + +inline qint8x8_t vdiv_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position) +{ + return vmul_qs8(a, vrecip_qs8(b, fixed_point_position), fixed_point_position); +} + +inline qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + return vmulq_qs8(a, vrecipq_qs8(b, fixed_point_position), fixed_point_position); +} + +template +inline qint8x8_t vtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); + const qint8x8_t const_one = vdup_n_s8(1); + const qint8x8_t A = vrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vadd_s8(shift_value, const_one) : shift_value); + const qint8x8_t B = vrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value); + const qint8x8_t C = vrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value); + const qint8x8_t D = vrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value); + const qint8x8_t x1 = vadd_s8(vmul_qs8(a, D, fixed_point_position), C); + const qint8x8_t x2 = vadd_s8(vmul_qs8(a, x1, fixed_point_position), B); + const qint8x8_t x3 = vadd_s8(vmul_qs8(a, x2, fixed_point_position), A); + const qint8x8_t res = vmul_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x8_t vqtaylor_poly_qs8(int8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(-(7 - fixed_point_position)); + const qint8x8_t const_one = vdup_n_s8(1); + const qint8x8_t A = vqrshl_s8(islog ? log_tab_qs8[0] : exp_tab_qs8[0], islog ? vqadd_s8(shift_value, const_one) : shift_value); + const qint8x8_t B = vqrshl_s8(islog ? log_tab_qs8[1] : exp_tab_qs8[1], shift_value); + const qint8x8_t C = vqrshl_s8(islog ? log_tab_qs8[2] : exp_tab_qs8[2], shift_value); + const qint8x8_t D = vqrshl_s8(islog ? log_tab_qs8[3] : exp_tab_qs8[3], shift_value); + const qint8x8_t x1 = vqadd_s8(vqmul_qs8(a, D, fixed_point_position), C); + const qint8x8_t x2 = vqadd_s8(vqmul_qs8(a, x1, fixed_point_position), B); + const qint8x8_t x3 = vqadd_s8(vqmul_qs8(a, x2, fixed_point_position), A); + const qint8x8_t res = vqmul_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position)); + const qint8x16_t const_one = vdupq_n_s8(1); + const qint8x16_t A = vrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vaddq_s8(shift_value, const_one) : shift_value); + const qint8x16_t B = vrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value); + const qint8x16_t C = vrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value); + const qint8x16_t D = vrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value); + const qint8x16_t x1 = vaddq_s8(vmulq_qs8(a, D, fixed_point_position), C); + const qint8x16_t x2 = vaddq_s8(vmulq_qs8(a, x1, fixed_point_position), B); + const qint8x16_t x3 = vaddq_s8(vmulq_qs8(a, x2, fixed_point_position), A); + const qint8x16_t res = vmulq_qs8(a, x3, fixed_point_position); + return res; +} + +template +inline qint8x16_t vqtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(-(7 - fixed_point_position)); + const qint8x16_t const_one = vdupq_n_s8(1); + const qint8x16_t A = vqrshlq_s8(islog ? log_tabq_qs8[0] : exp_tabq_qs8[0], islog ? vqaddq_s8(shift_value, const_one) : shift_value); + const qint8x16_t B = vqrshlq_s8(islog ? log_tabq_qs8[1] : exp_tabq_qs8[1], shift_value); + const qint8x16_t C = vqrshlq_s8(islog ? log_tabq_qs8[2] : exp_tabq_qs8[2], shift_value); + const qint8x16_t D = vqrshlq_s8(islog ? log_tabq_qs8[3] : exp_tabq_qs8[3], shift_value); + const qint8x16_t x1 = vqaddq_s8(vqmulq_qs8(a, D, fixed_point_position), C); + const qint8x16_t x2 = vqaddq_s8(vqmulq_qs8(a, x1, fixed_point_position), B); + const qint8x16_t x3 = vqaddq_s8(vqmulq_qs8(a, x2, fixed_point_position), A); + const qint8x16_t res = vqmulq_qs8(a, x3, fixed_point_position); + return res; +} + +inline qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t shift_value = vdup_n_s8(fixed_point_position - 7); + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_ln2 = vqrshl_s8(vdup_n_s8(0x58), shift_value); // ln(2) + const qint8x8_t const_inv_ln2 = vorr_s8(vqrshl_s8(vdup_n_s8(0x38), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint8x8_t m = vqmul_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint8x8_t dec_m = vqshl_s8(m, vdup_n_s8(-fixed_point_position)); + + qint8x8_t alpha = vqmul_qs8(vqshl_s8(dec_m, vdup_n_s8(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabs_qs8(vqsub_s8(a, alpha)); + + // Polynomial Approximation + qint8x8_t poly = vqtaylor_poly_qs8(alpha, fixed_point_position); + poly = vqadd_s8(poly, const_one); + + // Reconstruct + poly = vqshl_s8(poly, dec_m); + + return poly; +} + +inline qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t shift_value = vdupq_n_s8(fixed_point_position - 7); + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_ln2 = vqrshlq_s8(vdupq_n_s8(0x58), shift_value); // ln(2) + const qint8x16_t const_inv_ln2 = vorrq_s8(vqrshlq_s8(vdupq_n_s8(0x38), shift_value), const_one); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + const qint8x16_t m = vqmulq_qs8(a, const_inv_ln2, fixed_point_position); // x / ln(2) + + // get decimal part from m + const qint8x16_t dec_m = vqshlq_s8(m, vdupq_n_s8(-fixed_point_position)); + + qint8x16_t alpha = vqmulq_qs8(vqshlq_s8(dec_m, vdupq_n_s8(fixed_point_position)), const_ln2, fixed_point_position); + alpha = vqabsq_qs8(vqsubq_qs8(a, alpha)); + + // Polynomial Approximation + qint8x16_t poly = vqtaylor_polyq_qs8(alpha, fixed_point_position); + poly = vqaddq_s8(poly, const_one); + + // Reconstruct + poly = vqshlq_s8(poly, dec_m); + + return poly; +} + +inline qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_seven_dec = vdup_n_s8(7); + const qint8x8_t const_ln2 = vdup_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint8x8_t calc_reciprocal = vclt_s8(a, const_one); + qint8x8_t recip = vdup_n_s8(0); + recip = vbsl_s8(calc_reciprocal, recip, a); + + // Calculate reciprocal + recip = vrecip_qs8(recip, fixed_point_position); + a = vbsl_s8(calc_reciprocal, recip, a); + + // Get decimal part of a + qint8x8_t shift_value = vdup_n_s8(-fixed_point_position); + qint8x8_t dec_a = vshl_s8(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vsub_s8(const_seven_dec, vclz_s8(dec_a)); + + // Get x to range (1, 2] + const qint8x8_t shift_value_neg = vneg_s8(shift_value); + const qint8x8_t temp = vsub_s8(vrshl_s8(a, shift_value_neg), const_one); + const qint8x8_t sum = vmul_s8(shift_value, const_one); + + // Polynomial Approximation + qint8x8_t poly = vtaylor_poly_qs8(temp, fixed_point_position); + + // Reconstruct + poly = vmul_qs8(vadd_s8(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbsl_s8(calc_reciprocal, vneg_s8(poly), poly); + + return poly; +} + +inline qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_seven_dec = vdupq_n_s8(7); + const qint8x16_t const_ln2 = vdupq_n_s8(0x58 >> (7 - fixed_point_position)); // ln(2) + + // If 0 < a < 1, calculate log(1/x) + uint8x16_t calc_reciprocal = vcltq_s8(a, const_one); + qint8x16_t recip = vdupq_n_s8(0); + recip = vbslq_s8(calc_reciprocal, a, recip); + + // Calculate reciprocal + recip = vrecipq_qs8(recip, fixed_point_position); + a = vbslq_s8(calc_reciprocal, recip, a); + + // Get decimal part of a + qint8x16_t shift_value = vdupq_n_s8(-fixed_point_position); + qint8x16_t dec_a = vshlq_s8(a, shift_value); // a >> fixed_point_position + + // Get exponent of 2^n which is equal or less than dec_a + shift_value = vsubq_s8(const_seven_dec, vclzq_s8(dec_a)); + + // Get x to range (1, 2] + const qint8x16_t shift_value_neg = vnegq_s8(shift_value); + const qint8x16_t temp = vsubq_s8(vrshlq_s8(a, shift_value_neg), const_one); + const qint8x16_t sum = vmulq_s8(shift_value, const_one); + + // Polynomial Approximation + qint8x16_t poly = vtaylor_polyq_qs8(temp, fixed_point_position); + + // Reconstruct + poly = vmulq_qs8(vaddq_s8(poly, sum), const_ln2, fixed_point_position); + + // Set negative value for 0 < a < 1 + poly = vbslq_s8(calc_reciprocal, vnegq_s8(poly), poly); + + return poly; +} + +inline qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x8_t shift_value = vneg_s8(vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); + uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); + temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp); + qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); + + temp = vshl_s8(a, shift_value); + + // Initial guess + qint8x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vmul_qs8(x, vsub_s8(const_three, vmul_qs8(temp, vmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshl_s8(x, shift_value2); +} + +inline qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_three = vdup_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x8_t shift_value = vneg_s8(vqsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x8_t temp = vsub_s8(vdup_n_s8(8), vadd_s8(vclz_s8(a), vdup_n_s8(fixed_point_position))); + uint8x8_t temp_ltz = vclt_s8(temp, vdup_n_qs8(0)); + temp = vbsl_s8(temp_ltz, vadd_s8(temp, vdup_n_s8(1)), temp); + qint8x8_t shift_value2 = vneg_s8(vshr_n_s8(temp, 1)); + + temp = vshl_s8(a, shift_value); + + // Initial guess + qint8x8_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshr_n_s8(vqmul_qs8(x, vqsub_s8(const_three, vqmul_qs8(temp, vqmul_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshl_s8(x, shift_value2); +} + +inline qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x16_t shift_value = vnegq_s8(vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); + uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); + temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp); + qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); + + temp = vshlq_s8(a, shift_value); + + // Initial guess + qint8x16_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vmulq_qs8(x, vsubq_s8(const_three, vmulq_qs8(temp, vmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshlq_s8(x, shift_value2); +} + +inline qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_three = vdupq_n_s8(3 << fixed_point_position); + + // Find shift value. Number must be in (0.5, 2) range. + qint8x16_t shift_value = vnegq_s8(vqsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position)))); + + // Add one when the shift value is negative in order to get the correct result when we shift right with 1 + qint8x16_t temp = vsubq_s8(vdupq_n_s8(8), vaddq_s8(vclzq_s8(a), vdupq_n_s8(fixed_point_position))); + uint8x16_t temp_ltz = vcltq_s8(temp, vdupq_n_qs8(0)); + temp = vbslq_s8(temp_ltz, vaddq_s8(temp, vdupq_n_s8(1)), temp); + qint8x16_t shift_value2 = vnegq_s8(vshrq_n_s8(temp, 1)); + + temp = vshlq_s8(a, shift_value); + + // Initial guess + qint8x16_t x = temp; + + // Calculate (x / 2) * (3 - a * x^2) + // After three iterations we have the result for 8 bit + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + x = vshrq_n_s8(vqmulq_qs8(x, vqsubq_s8(const_three, vqmulq_qs8(temp, vqmulq_qs8(x, x, fixed_point_position), fixed_point_position)), fixed_point_position), 1); + + return vshlq_s8(x, shift_value2); +} + +inline qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position) +{ + const qint8x8_t const_one = vdup_n_s8(1 << fixed_point_position); + const qint8x8_t const_two = vdup_n_s8(2 << fixed_point_position); + + qint8x8_t exp2x = vqexp_qs8(vqmul_qs8(const_two, a, fixed_point_position), fixed_point_position); + qint8x8_t num = vqsub_qs8(exp2x, const_one); + qint8x8_t den = vqadd_qs8(exp2x, const_one); + qint8x8_t tanh = vqmul_qs8(num, vrecip_qs8(den, fixed_point_position), fixed_point_position); + + return tanh; +} + +inline qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position) +{ + const qint8x16_t const_one = vdupq_n_s8(1 << fixed_point_position); + const qint8x16_t const_two = vdupq_n_s8(2 << fixed_point_position); + + qint8x16_t exp2x = vqexpq_qs8(vqmulq_qs8(const_two, a, fixed_point_position), fixed_point_position); + qint8x16_t num = vqsubq_qs8(exp2x, const_one); + qint8x16_t den = vqaddq_qs8(exp2x, const_one); + qint8x16_t tanh = vqmulq_qs8(num, vqrecipq_qs8(den, fixed_point_position), fixed_point_position); + + return tanh; +} + +inline qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position) +{ + return vqexpq_qs8(vqmulq_qs8(b, vlogq_qs8(a, fixed_point_position), fixed_point_position), fixed_point_position); +} +} diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h new file mode 100644 index 0000000000..eaa50f123b --- /dev/null +++ b/arm_compute/core/NEON/NEKernels.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEKERNELS_H__ +#define __ARM_COMPUTE_NEKERNELS_H__ + +/* Header regrouping all the NEON kernels */ +#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" +#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" +#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" +#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" +#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" +#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" +#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NERemapKernel.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" +#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" + +#endif /* __ARM_COMPUTE_NEKERNELS_H__ */ diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h new file mode 100644 index 0000000000..bb8a330c1e --- /dev/null +++ b/arm_compute/core/NEON/NEMath.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMATH_H__ +#define __ARM_COMPUTE_NEMATH_H__ + +#include + +namespace arm_compute +{ +/** Calculate inverse square root. + * + * @param[in] x Input value. + * + * @return The calculated inverse square root. + */ +float32x4_t vinvsqrtq_f32(float32x4_t x); + +/** Calculate reciprocal. + * + * @param[in] x Input value. + * + * @return The calculated reciprocal. + */ +float32x4_t vinvq_f32(float32x4_t x); + +/** Perform a 7th degree polynomial approximation using Estrin's method. + * + * @param[in] x Input vector value in F32 format. + * @param[in] coeffs Polynomial coefficients table. + * + * @return The calculated approximation. + */ +float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array &coeffs); + +/** Calculate exponential + * + * @param[in] x Input vector value in F32 format. + * + * @return The calculated exponent. + */ +float32x4_t vexpq_f32(float32x4_t x); + +/** Calculate logarithm + * + * @param[in] x Input vector value in F32 format. + * + * @return The calculated logarithm. + */ +float32x4_t vlogq_f32(float32x4_t x); + +/** Calculate hyperbolic tangent. + * + * tanh(x) = (e^2x - 1)/(e^2x + 1) + * + * @note We clamp x to [-5,5] to avoid overflowing issues. + * + * @param[in] val Input vector value in F32 format. + * + * @return The calculated Hyperbolic Tangent. + */ +float32x4_t vtanhq_f32(float32x4_t val); + +/** Calculate n power of a number. + * + * pow(x,n) = e^(n*log(x)) + * + * @param[in] val Input vector value in F32 format. + * @param[in] n Powers to raise the input to. + * + * @return The calculated power. + */ +float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); +} +#include "arm_compute/core/NEON/NEMath.inl" +#endif /* __ARM_COMPUTE_NEMATH_H__ */ diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl new file mode 100644 index 0000000000..a31a4c0dc5 --- /dev/null +++ b/arm_compute/core/NEON/NEMath.inl @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +namespace arm_compute +{ +/* Exponent polynomial coefficients */ +const std::array exp_tab = +{ + { + vdupq_n_f32(1.f), + vdupq_n_f32(0.0416598916054f), + vdupq_n_f32(0.500000596046f), + vdupq_n_f32(0.0014122662833f), + vdupq_n_f32(1.00000011921f), + vdupq_n_f32(0.00833693705499f), + vdupq_n_f32(0.166665703058f), + vdupq_n_f32(0.000195780929062f), + } +}; + +/* Logarithm polynomial coefficients */ +const std::array log_tab = +{ + { + vdupq_n_f32(-2.29561495781f), + vdupq_n_f32(-2.47071170807f), + vdupq_n_f32(-5.68692588806f), + vdupq_n_f32(-0.165253549814f), + vdupq_n_f32(5.17591238022f), + vdupq_n_f32(0.844007015228f), + vdupq_n_f32(4.58445882797f), + vdupq_n_f32(0.0141278216615f), + } +}; + +inline float32x4_t vinvsqrtq_f32(float32x4_t x) +{ + float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); + + return sqrt_reciprocal; +} + +inline float32x4_t vinvq_f32(float32x4_t x) +{ + float32x4_t recip = vrecpeq_f32(x); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + recip = vmulq_f32(vrecpsq_f32(x, recip), recip); + return recip; +} + +inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array &coeffs) +{ + float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x); + float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x); + float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x); + float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x); + float32x4_t x2 = vmulq_f32(x, x); + float32x4_t x4 = vmulq_f32(x2, x2); + float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4); + return res; +} + +inline float32x4_t vexpq_f32(float32x4_t x) +{ + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + static const float32x4_t CONST_INV_LN2 = vdupq_n_f32(1.4426950408f); // 1/ln(2) + + // Perform range reduction [-log(2),log(2)] + int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2)); + float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2); + + // Polynomial Approximation + float32x4_t poly = vtaylor_polyq_f32(val, exp_tab); + + // Reconstruct + poly = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(poly), vshlq_n_s32(m, 23))); + + return poly; +} + +inline float32x4_t vlogq_f32(float32x4_t x) +{ + static const int32x4_t CONST_127 = vdupq_n_s32(127); // 127 + static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + + // Extract exponent + int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127); + float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23))); + + // Polynomial Approximation + float32x4_t poly = vtaylor_polyq_f32(val, log_tab); + + // Reconstruct + poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2); + + return poly; +} + +inline float32x4_t vtanhq_f32(float32x4_t val) +{ + static const float32x4_t CONST_1 = vdupq_n_f32(1.f); + static const float32x4_t CONST_2 = vdupq_n_f32(2.f); + static const float32x4_t CONST_MIN_TANH = vdupq_n_f32(-10.f); + static const float32x4_t CONST_MAX_TANH = vdupq_n_f32(10.f); + + float32x4_t x = vminq_f32(vmaxq_f32(val, CONST_MIN_TANH), CONST_MAX_TANH); + float32x4_t exp2x = vexpq_f32(vmulq_f32(CONST_2, x)); + float32x4_t num = vsubq_f32(exp2x, CONST_1); + float32x4_t den = vaddq_f32(exp2x, CONST_1); + float32x4_t tanh = vmulq_f32(num, vinvq_f32(den)); + return tanh; +} + +inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) +{ + return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); +} +} \ No newline at end of file diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..9ef93ce67a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the absolute difference kernel + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class NEAbsoluteDifferenceKernel : public INEKernel +{ +public: + /** Default constructor */ + NEAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~NEAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output tensors + * + * @param[in] input1 Source tensor. Data types supported: U8/S16 + * @param[in] input2 Source tensor. Data types supported: U8/S16 + * @param[out] output Destination tensor, Data types supported: U8/S16 + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised absolute difference functions + * + * @param[in] input1 An input tensor. Data types supported: U8/S16. + * @param[in] input2 An input tensor. Data types supported: U8/S16. + * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. + * @param[in] window Region on which to execute the kernel. + */ + using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + + /** Absolute difference function to use for the particular tensor formats passed to configure() */ + AbsDiffFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h new file mode 100644 index 0000000000..df6d7b8891 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_NEACCUMULATEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the accumulate kernel + * + * Accumulation is computed by: + * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] + */ +class NEAccumulateKernel : public INESimpleKernel +{ +public: + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] accum Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; +}; + +/** Interface for the accumulate weighted kernel + * + * Weighted accumulation is computed: + * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] + * + * Where @f$ 0 \le \alpha \le 1 @f$ + * Conceptually, the rounding for this is defined as: + * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] +*/ +class NEAccumulateWeightedKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEAccumulateWeightedKernel(); + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha Scalar value in the range [0.0f, 1.0f] + * @param[in,out] accum Accumulated tensor. Data type supported: U8. + */ + void configure(const ITensor *input, float alpha, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + float _alpha; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Interface for the accumulate weighted kernel using F16 */ +class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +#else +using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; +#endif + +/** Interface for the accumulate squared kernel + * + * The accumulation of squares is computed: + * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] + * + * Where @f$ 0 \le shift \le 15 @f$ +*/ +class NEAccumulateSquaredKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEAccumulateSquaredKernel(); + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift Shift value in the range of [0, 15] + * @param[in,out] accum Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *accum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + uint32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h new file mode 100644 index 0000000000..97f92d6a1e --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/FixedPoint.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the activation layer kernel. */ +class NEActivationLayerKernel : public INESimpleKernel +{ +public: + /** Constructor */ + NEActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; + /** Default move constructor */ + NEActivationLayerKernel(NEActivationLayerKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; + /** Default move assignment operator */ + NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer information. + */ + void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using ActivationFunction = ActivationLayerInfo::ActivationFunction; + /** Common signature for all the specialised @ref NEActivationLayerKernel functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); + /** Function to apply an activation function on a tensor. + * + * @param[in] window Region on which to execute the kernel + */ + template + typename std::enable_if::value, void>::type activation(const Window &window); + +private: + ActivationFunctionExecutorPtr _func; + ActivationLayerInfo _act_info; +}; +} +#endif /*__ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h new file mode 100644 index 0000000000..b36ca46e1a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ +#define __ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEArithmeticAdditionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEArithmeticAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticAdditionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). + * @param[in] policy Overflow policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised add functions + * + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32. + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). + * @param[in] window Region on which to execute the kernel. + */ + using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + /** Add function to use for the particular tensor types passed to configure() */ + AddFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h new file mode 100644 index 0000000000..0eb9c23686 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ +#define __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform subtraction between two tensors */ +class NEArithmeticSubtractionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEArithmeticSubtractionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default; + /** Default destructor */ + ~NEArithmeticSubtractionKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: U8/S16/F32 + * @param[in] input2 An input tensor. Data types supported: U8/S16/F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F32 (only if both inputs are F32). + * @param[in] policy Overflow policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised sub functions + * + * @param[in] input1 An input tensor. Data types supported: U8, S16, F32. + * @param[in] input2 An input tensor. Data types supported: U8, S16, F32 (only if @p input1 is F32). + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F32 (only if both inputs are F32) + * @param[in] window Region on which to execute the kernel. + */ + using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + SubFunction *_func; + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..29fcbd26a0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the batch normalization layer kernel. + */ +class NEBatchNormalizationLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using BatchNormFunction = void(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window); + BatchNormFunction *_func; + const ITensor *_input; + ITensor *_output; + const ITensor *_mean; + const ITensor *_var; + const ITensor *_gamma; + const ITensor *_beta; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h new file mode 100644 index 0000000000..b931445419 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] + */ +class NEBitwiseAndKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseAndKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEANDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h new file mode 100644 index 0000000000..e34eb0f5ae --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise NOT operation + * + * Result is computed by: + * @f[ output(x,y) = \lnot input(x,y) @f] + */ +class NEBitwiseNotKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseNotKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default; + /** Initialise the kernel's input and output + * + * @param[in] input An input tensor. Data type supported: U8. + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISENOTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h new file mode 100644 index 0000000000..d2bae2660c --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEORKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEORKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise inclusive OR between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] + */ +class NEBitwiseOrKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseOrKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default; + /** Initialise the kernel's inputs and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h new file mode 100644 index 0000000000..9dea36e7e3 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ +#define __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors + * + * Result is computed by: + * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] + */ +class NEBitwiseXorKernel : public INEKernel +{ +public: + /** Default constructor */ + NEBitwiseXorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input1 An input tensor. Data type supported: U8. + * @param[in] input2 An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input1; /**< Source tensor 1 */ + const ITensor *_input2; /**< Source tensor 2 */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEXORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h new file mode 100644 index 0000000000..6b7bebbf17 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBOX3x3KERNEL_H__ +#define __ARM_COMPUTE_NEBOX3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Box 3x3 filter */ +class NEBox3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform a Box 3x3 filter using F16 simd + */ +class NEBox3x3FP16Kernel : public NEBox3x3Kernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +#else +using NEBox3x3FP16Kernel = NEBox3x3Kernel; +#endif +} +#endif /*__ARM_COMPUTE_NEBOX3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h new file mode 100644 index 0000000000..b86085f439 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECANNYEDGEKERNEL_H__ +#define __ARM_COMPUTE_NECANNYEDGEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Computes magnitude and quantised phase from inputs gradients. */ +class NEGradientKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGradientKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel(const NEGradientKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGradientKernel &operator=(const NEGradientKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGradientKernel(NEGradientKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGradientKernel &operator=(NEGradientKernel &&) = default; + /** Default destructor */ + virtual ~NEGradientKernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @note gx, gy and magnitude must all be the same size (either 16 or 32) + * + * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. + * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. + * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). + * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + */ + virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + /** Common signature for all the specialised gradient functions + * + * @param[in] gx_ptr Pointer to the first input tensor. + * @param[in] gy_ptr Pointer to the second input tensor. + * @param[out] magnitude_ptr Pointer to the first output tensor + * @param[out] phase_ptr Pointer to the second output tensor + */ + using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); + + GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ + const ITensor *_gx; /**< Source tensor - Gx component */ + const ITensor *_gy; /**< Source tensor - Gy component */ + ITensor *_magnitude; /**< Destination tensor - Magnitude */ + ITensor *_phase; /**< Destination tensor - Quantized phase */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform Gradient computation + */ +class NEGradientFP16Kernel : public NEGradientKernel +{ +public: + // Inherited methods overriden: + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override; +}; +#else /* ARM_COMPUTE_ENABLE_FP16 */ +using NEGradientFP16Kernel = NEGradientKernel; +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +/** NEON kernel to perform Non-Maxima suppression for Canny Edge. + * + * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input + * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. + * + * @note Hysteresis is computed in @ref NEEdgeTraceKernel + */ +class NEEdgeNonMaxSuppressionKernel : public INEKernel +{ +public: + /** Default constructor */ + NEEdgeNonMaxSuppressionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; + /** Default destructor */ + ~NEEdgeNonMaxSuppressionKernel() = default; + + /** Initialise the kernel's sources, destination and border mode. + * + * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. + * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Common signature for all the specialised non-maxima suppression functions + * + * @param[in] magnitude_ptr Pointer to the first input tensor. + * @param[in] phase_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] stride_mag Stride of the magnitude tensor + * @param[in] upper_thr Upper threshold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis + */ + using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, + const int32_t lower_thr); + + EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_magnitude; /**< Source tensor - Magnitude */ + const ITensor *_phase; /**< Source tensor - Quantized phase */ + ITensor *_output; /**< Destination tensor */ + int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ + int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ +}; + +/** NEON kernel to perform Edge tracing */ +class NEEdgeTraceKernel : public INEKernel +{ +public: + /** Default constructor */ + NEEdgeTraceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; + /** Allow instances of this class to be moved */ + NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; + /** Default constructor */ + ~NEEdgeTraceKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" + * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; + +private: + ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; +} +#endif /* __ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h new file mode 100644 index 0000000000..8b669a4d28 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ +#define __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel combine kernel */ +class NEChannelCombineKernel : public INEKernel +{ +public: + /** Default constructor */ + NEChannelCombineKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel(NEChannelCombineKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; + /** Default destructor */ + ~NEChannelCombineKernel() = default; + + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Configure function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + /** Combine 3 planes to form a three channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_3C(const Window &win); + /** Combine 4 planes to form a four channel single plane tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_4C(const Window &win); + /** Combine 3 planes to form a single plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + template + void combine_YUV_1p(const Window &win); + /** Combine 3 planes to form a two plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_2p(const Window &win); + /** Combine 3 planes to form a three plane YUV tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void combine_YUV_3p(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win, uint32_t plane_id); + /** Common signature for all the specialised ChannelCombine functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); + /** ChannelCombine function to use for the particular tensor types passed to configure() */ + ChannelCombineFunction _func; + std::array _planes; + ITensor *_output; + IMultiImage *_output_multi; + std::array _x_subsampling; + std::array _y_subsampling; + unsigned int _num_elems_processed_per_iteration; + bool _is_parallelizable; +}; +} +#endif /* __ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h new file mode 100644 index 0000000000..0715e1f8cb --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ +#define __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the channel extract kernel */ +class NEChannelExtractKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEChannelExtractKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel(NEChannelExtractKernel &&) = default; + /** Allow instances of this class to be moved */ + NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; + /** Default destructor */ + ~NEChannelExtractKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 + * @param[in] channel Channel to extract. + * @param[out] output Destination tensor. Format supported: u8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 + * @param[in] channel Channel to extract. + * @param[out] output Single-planar destination image. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Extract one channel from a two channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_2C_img(const Window &win); + /** Extract one channel from a three channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_3C_img(const Window &win); + /** Extract one channel from a four channel planar tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_1C_from_4C_img(const Window &win); + /** Extract U/V channel from a single planar YUVY/UYVY tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void extract_YUYV_uv(const Window &win); + /** Copies a full plane to the output tensor. + * + * @param[in] win Region on which to execute the kernel. + */ + void copy_plane(const Window &win); + /** Common signature for all the specialised ChannelExtract functions + * + * @param[in] window Region on which to execute the kernel. + */ + using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); + /** ChannelExtract function to use for the particular tensor types passed to configure() */ + ChannelExtractFunction _func; + unsigned int _lut_index; +}; +} +#endif /* __ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h new file mode 100644 index 0000000000..f6bc2152da --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECOL2IMKERNEL_H__ +#define __ARM_COMPUTE_NECOL2IMKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform col2im reshaping. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class NECol2ImKernel : public INEKernel +{ +public: + /** Default constructor */ + NECol2ImKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel(const NECol2ImKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECol2ImKernel &operator=(const NECol2ImKernel &) = delete; + /** Allow instances of this class to be moved */ + NECol2ImKernel(NECol2ImKernel &&) = default; + /** Allow instances of this class to be moved */ + NECol2ImKernel &operator=(NECol2ImKernel &&) = default; + /** Default destructor */ + ~NECol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: U8/S8/QS8/U16/S16/QS16/F16/U32/S32/F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const ITensor *input, ITensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Template function to run the col2im + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_col2im(const Window &window); + + /** Common signature for all the specialised col2im functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window); + + Col2ImFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_NECOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h new file mode 100644 index 0000000000..2297218117 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_COLORCONVERTKERNEL_H__ +#define __ARM_COMPUTE_COLORCONVERTKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/** Interface for the color convert kernel */ +class NEColorConvertKernel : public INEKernel +{ +public: + /** Default constructor */ + NEColorConvertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel(const NEColorConvertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEColorConvertKernel(NEColorConvertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; + /** Default destructor */ + ~NEColorConvertKernel() = default; + + /** Set the input and output of the kernel + * + * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 + * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/) + */ + void configure(const ITensor *input, ITensor *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 + */ + void configure(const IMultiImage *input, IImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) + */ + void configure(const IImage *input, IMultiImage *output); + /** Set the input and output of the kernel + * + * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV + * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) + */ + void configure(const IMultiImage *input, IMultiImage *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); + const void *_input; + void *_output; + ColorConvertFunction *_func; +}; +} +#endif /*__ARM_COMPUTE_NECOLORCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h new file mode 100644 index 0000000000..588a228a5d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ +#define __ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/****************************************************************************************\ + * Square Convolution * +\****************************************************************************************/ + +/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). + * The client can supply a convolution matrix \f$ C_{m,n} \f$. + * @f{eqnarray}{ + * k_0 &=& \frac{m}{2} \\ + * l_0 &=& \frac{n}{2} \\ + * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} + * @f} + * + * @note The above equation for this function is similar to the default OpenCV Filter2D function, + * which actually computes a correlation and not a convolution. + * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. + */ +template +class NEConvolutionKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEConvolutionKernel(); + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + template + void convolution(const Window &win); + +protected: + uint32_t _scale; /**< scale of the convolution */ + std::array _convolution; /**< convolution matrix */ +}; + +/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ +using NEConvolution3x3Kernel = NEConvolutionKernel<3>; +/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ +using NEConvolution5x5Kernel = NEConvolutionKernel<5>; +/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ +using NEConvolution7x7Kernel = NEConvolutionKernel<7>; +///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ +using NEConvolution9x9Kernel = NEConvolutionKernel<9>; + +/****************************************************************************************\ + * Separable Square Convolution * +\****************************************************************************************/ + +/** Kernel for the Horizontal pass of a Separable Convolution */ +template +class NESeparableConvolutionHorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NESeparableConvolutionHorKernel(); + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data types supported: U16, S16, S32. + * @param[in] conv_row Convolution matrix to apply to the input tensor. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor.. + * + * @param[in] window Window to apply the convolution on. + */ + template + void convolve(const Window &window); + + std::array _conv_row; /**< Convolution coefficients */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; +/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; +/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ +using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; + +/** Kernel for the Vertical pass of a Separable Convolution */ +template +class NESeparableConvolutionVertKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NESeparableConvolutionVertKernel(); + + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U16, S16, S32. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv_col Convolution matrix to apply to the input tensor. + * @param[in] scale Scale of the convolution matrix + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as U16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_u16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S16. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s16(const Window &win); + /** Apply the object's convolution to the given window of the input tensor. + * This function is used if the intermediate values have been stored as S32. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution_s32(const Window &win); + + std::array _conv_col; /**< Convolution coefficients */ + uint32_t _scale; /**< Convolution's scale */ +}; + +/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ +using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; +/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ +using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; +/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ +using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; + +/****************************************************************************************\ + * Rectangle Convolution * +\****************************************************************************************/ + +/** Kernel for the running convolution on a rectangle matrix. + * + * @note Supports combinations of 3,5,7 and 9. + */ +class NEConvolutionRectangleKernel : public INEKernel +{ +public: + /** Default constructor */ + NEConvolutionRectangleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor, Data types supported: U8, S16. + * @param[in] conv Convolution matrix to apply to the input tensor. + * @param[in] width Width of convolution matrix (Number of columns) + * @param[in] height Height of convolution matrix (Number of rows) + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + unsigned int get_index(uint32_t val); + /** Apply the object's convolution to the given window of the input tensor. + * + * @param[in] win Window to apply the convolution on. + */ + template + void convolution(const Window &win); + +protected: + const ITensor *_input; /**< Input tensor */ + ITensor *_output; /**< Output tensor */ + uint32_t _scale; /**< Scale of the convolution */ + std::vector _convolution; /**< Convolution matrix */ + BorderSize _border_size; /**< Calculated border width */ + uint32_t _func_idx; /**< Index used to specify convolution function to be used */ + const static unsigned int _nr_supported_sizes + { + 4 + }; /**< Number of supported permutations */ +}; +} +#endif /*__ARM_COMPUTE_NECONVOLUTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h new file mode 100644 index 0000000000..67b8c6052d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ +#define __ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class IDistribution1D; +class ILut; +class ITensor; +using IImage = ITensor; + +/** Interface for the cumulative distribution (cummulative summmation) calculation kernel. + * + * This kernel calculates the cumulative sum of a given distribution (meaning that each output element + * is the sum of all its previous elements including itself) and creates a lookup table with the normalized + * pixel intensities which is used for improve the constrast of the image. + */ +class NECumulativeDistributionKernel : public INEKernel +{ +public: + /** Default constructor */ + NECumulativeDistributionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default; + /** Allow instances of this class to be moved */ + NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default; + /** Set the input and output distribution. + * + * @param[in] input Input image. Data type supported: U8 + * @param[in] distribution Unnormalized 256-bin distribution of the input image. + * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution. + * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements. + */ + void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; /**< Input image. */ + const IDistribution1D *_distribution; /**< Input histogram of the input image. */ + IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */ + ILut *_output; /**< Output with the equalization lookup table. */ +private: + static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */ +}; +} + +#endif /*__ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h new file mode 100644 index 0000000000..7384cd1f02 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEDepthConcatenateKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateKernel(const NEDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDepthConcatenateKernel &operator=(const NEDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDepthConcatenateKernel(NEDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDepthConcatenateKernel &operator=(NEDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~NEDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: F32. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + ITensor *_output; + int _top_bottom; + int _left_right; + unsigned int _depth_offset; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h new file mode 100644 index 0000000000..0c5c29e4db --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ +#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Depth conversion kernel */ +class NEDepthConvertKernel : public INESimpleKernel +{ +public: + /** Default constructor*/ + NEDepthConvertKernel(); + /** Set the input and output of the kernel + * + * Valid conversions Input -> Output : + * + * - QS8 -> F32 + * - U8 -> U16, S16, S32 + * - U16 -> U8, U32 + * - S16 -> U8, S32 + * - F32 -> QS8 + * + * + * @param[in] input The input tensor to convert. Data types supported: U8/QS8/U16/S16/F32. + * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + ConvertPolicy _policy; + uint32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h new file mode 100644 index 0000000000..abb8a894c0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ +#define __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. + * + */ +class NEDerivativeKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDerivativeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel(const NEDerivativeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDerivativeKernel(NEDerivativeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; + /** Initialise the kernel's sources, destination and border + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform derivative along the X direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_x(const Window &window); + /** Function to perform derivative along the Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_y(const Window &window); + /** Function to perform derivative along the X and Y direction on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void derivative_xy(const Window &window); + /** Common signature for all the specialised derivative functions + * + * @param[in] window Region on which to execute the kernel. + */ + using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); + /** Derivative function to use for the particular tensor types passed to configure() */ + DerivativeFunction _func; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ + ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ +}; +} +#endif /* __ARM_COMPUTE_NEDERIVATIVEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h new file mode 100644 index 0000000000..05f148a1fd --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDilateKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDILATEKERNEL_H__ +#define __ARM_COMPUTE_NEDILATEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image dilatation */ +class NEDilateKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEDILATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h new file mode 100644 index 0000000000..f098e18655 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to accumulate the biases to each element of the input tensor + * + * @note We assume bias to be shared + */ +class NEDirectConvolutionLayerBiasAccumulateKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDirectConvolutionLayerBiasAccumulateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerBiasAccumulateKernel(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerBiasAccumulateKernel &operator=(const NEDirectConvolutionLayerBiasAccumulateKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerBiasAccumulateKernel(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerBiasAccumulateKernel &operator=(NEDirectConvolutionLayerBiasAccumulateKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerBiasAccumulateKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * Data type supported: QS8/F32 + * @param[in] bias The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input + * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * Data type supported: Same as @p input + */ + void configure(ITensor *input, const ITensor *bias, ITensor *output = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using BiasAccumulateKernel = void(ITensor *input, const ITensor *bias, const Window window, ITensor *output); + +private: + BiasAccumulateKernel *_func; + ITensor *_input; + const ITensor *_bias; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERBIASACCUMULATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..d726071606 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON interface for Direct Convolution Layer kernel */ +class NEDirectConvolutionLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~NEDirectConvolutionLayerKernel() = default; + /** Set the input, weights and output tensors. + * + * @param[in] input Input tensor. Data types supported: QS8/F32. + * @param[in] weights Set of kernels to convolve the input volume. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported: Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; + const ITensor *_weights; + ITensor *_output; + PadStrideInfo _conv_info; + BorderSize _border_size; + unsigned int _kernel_size; + unsigned int _num_elems_read_per_iteration; + unsigned int _num_elems_written_per_iteration; +}; +} +#endif /*__ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h new file mode 100644 index 0000000000..86dc217cc0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEErodeKernel.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEERODEKERNEL_H__ +#define __ARM_COMPUTE_NEERODEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform boolean image erosion */ +class NEErodeKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEERODEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h new file mode 100644 index 0000000000..d9bd6acde9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFASTCORNERSKERNEL_H__ +#define __ARM_COMPUTE_NEFASTCORNERSKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** NEON kernel to perform fast corners */ +class NEFastCornersKernel : public INEKernel +{ +public: + /** Constructor */ + NEFastCornersKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel(const NEFastCornersKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFastCornersKernel(NEFastCornersKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Output image. Data type supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IImage *_input; /**< source image */ + IImage *_output; /**< inermediate results */ + uint8_t _threshold; /**< threshold on difference between intensity */ + bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ +}; +} +#endif diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h new file mode 100644 index 0000000000..8e0846ea88 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLARRAYKERNEL_H__ +#define __ARM_COMPUTE_NEFILLARRAYKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */ +class NEFillArrayKernel : public INEKernel +{ +public: + /** Default contructor */ + NEFillArrayKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel(const NEFillArrayKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillArrayKernel(NEFillArrayKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default; + /** Default detructor */ + ~NEFillArrayKernel() = default; + + /** Initialise the kernel. + * + * @param[in] input Source image. Data type supported: U8. + * @param[in] threshold Texels greater than the threshold will be added to the array. + * @param[out] output Arrays of keypoints to store the results. + */ + void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IImage *_input; + IKeyPointArray *_output; + uint8_t _threshold; +}; +} +#endif diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h new file mode 100644 index 0000000000..3ec66115e2 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_NEFILLBORDERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to fill borders */ +class NEFillBorderKernel : public INEKernel +{ +public: + /** Default Constructor */ + NEFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel(const NEFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillBorderKernel(NEFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default; + /** Default destructor */ + ~NEFillBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QS8/QS16/S16/S32/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + template + void fill_replicate_single_channel(const Window &window); + template + void fill_constant_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + BorderMode _mode; + PixelValue _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_NEFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h new file mode 100644 index 0000000000..61e6e46463 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ +#define __ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to fill the interior borders */ +class NEFillInnerBorderKernel : public INEKernel +{ +public: + /** Default constructor */ + NEFillInnerBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillInnerBorderKernel(const NEFillInnerBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEFillInnerBorderKernel &operator=(const NEFillInnerBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEFillInnerBorderKernel(NEFillInnerBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEFillInnerBorderKernel &operator=(NEFillInnerBorderKernel &&) = default; + /** Default destructor */ + ~NEFillInnerBorderKernel() = default; + + /** Initialise the function. + * + * @note This kernel fills the borders within the XY-planes. + * + * @param[in,out] input Tensor to process. Data types supported: U8/QS8/S16/S32/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, BorderSize border_size, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + template + void fill_value_single_channel(const Window &window); + + ITensor *_tensor; + BorderSize _border_size; + PixelValue _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..b9884ffb57 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__ +#define __ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to interleave the elements of a matrix + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class NEGEMMInterleave4x4Kernel : public INESimpleKernel +{ +public: + /* Constructor */ + NEGEMMInterleave4x4Kernel(); + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the transpose functions + * + * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + */ + using GEMMInterleaveFunction = void(const ITensor *input, ITensor *output, const Window &window); + + GEMMInterleaveFunction *_func; /**< GEMM interleave function to use for the particular tensor types passed to configure() */ +}; +} +#endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h new file mode 100644 index 0000000000..ba4dcc3373 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply matrices + * + * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + */ +class NEGEMMLowpMatrixMultiplyKernel : public INEKernel +{ +public: + /** Constructor */ + NEGEMMLowpMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two + * kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8 + * @param[in] input1 Input tensor containing the transposed Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication, Data type supported: same as @p input0 + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Value to be multipied to each entry of the result. + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + int32_t _a_offset; + int32_t _b_offset; + int32_t _output_offset; + int32_t _output_mult_int; + int32_t _shift; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..c0ecafcd39 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; +/** NEON kernel to add a bias to each row of the input tensor */ +class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel +{ +public: + /** Default constructor */ + NEGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixAccumulateBiasesKernel &operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Default destructor */ + ~NEGEMMMatrixAccumulateBiasesKernel() = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data type supported: QS8/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input + */ + void configure(ITensor *accum, const ITensor *biases); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + ITensor *_accum; + const ITensor *_biases; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..1ab52fa2f2 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: + * + * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size + * + * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have: + * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel + * - MTX_1 = C + */ +class NEGEMMMatrixAdditionKernel : public INESimpleKernel +{ +public: + /** Constructor */ + NEGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied */ + NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: QS8/F16/F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *input, ITensor *output, float beta); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the matrix addition functions + * + * @param[in] input An input tensor. Data types supported: QS8/F16/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + * @param[in] beta Weight of matrix C + */ + using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta); + /** Matrix addition function to use for the particular tensor types passed to configure() */ + MatrixAdditionFunction *_func; + float _beta; +}; +} +#endif /* __ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..a684945828 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + */ +class NEGEMMMatrixMultiplyKernel : public INEKernel +{ +public: + /** Constructor */ + NEGEMMMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output. + * + * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * These two kernels change the layout of the original matrices to be more cache-friendly. + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; + float _alpha; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H__*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..5d8a3697cb --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ +#define __ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) + * + * Following an example of how the transposition1xW works when the input data is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * Following an example of how the transposition1xW works when the input data type is F16 + * + * @f[ + * \left( \begin{array}{cccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a7 \\ + * a10 & a11 & a12 & a13 & a14 & a15 & a16 & 17 \\ + * a20 & a21 & a22 & a23 & a24 & a25 & a26 & 27 \\ + * a30 & a31 & a32 & a33 & a34 & a35 & a36 & 37 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} + * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ + * \end{array} \right) + * @f] + * + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + * + */ +class NEGEMMTranspose1xWKernel : public INESimpleKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input. + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h new file mode 100644 index 0000000000..763fab88f6 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 3x3 filter */ +class NEGaussian3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: S16 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h new file mode 100644 index 0000000000..86b28907da --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */ +class NEGaussian5x5HorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEGaussian5x5HorKernel(); + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; +}; + +/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */ +class NEGaussian5x5VertKernel : public INESimpleKernel +{ +public: + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h new file mode 100644 index 0000000000..40a6aa7375 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ +#define __ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a GaussianPyramid (horizontal pass) */ +class NEGaussianPyramidHorKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEGaussianPyramidHorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidHorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + int _l2_load_offset; +}; + +/** NEON kernel to perform a GaussianPyramid (vertical pass) */ +class NEGaussianPyramidVertKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NEGaussianPyramidVertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; + /** Default destructor */ + ~NEGaussianPyramidVertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + int _t2_load_offset; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h new file mode 100644 index 0000000000..dd85778b8a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ +#define __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG Orientation Binning */ +class NEHOGOrientationBinningKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGOrientationBinningKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; + /** Default destructor */ + ~NEHOGOrientationBinningKernel() = default; + + /** Initialise the kernel's inputs, output and HOG's metadata + * + * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. + * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 + * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor + * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor + * @param[out] output_ptr Pointer to the output cell of hog space tensor + * @param[in] mag_stride Stride of the magnitude tensor + * @param[in] phase_stride Stride of the phase tensor + * @param[in] cell_width Width of the cell + * @param[in] cell_height Height of the cell + * @param[in] num_bins Number of bins for each cell + * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index + */ + using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, + size_t cell_height, size_t num_bins, float phase_scale); + /** Orientation binning function to use for the particular cell width passed to configure() */ + OrientBinFunc *_func; + const ITensor *_input_magnitude; + const ITensor *_input_phase; + ITensor *_output; + size_t _cell_width; + size_t _cell_height; + size_t _num_bins; + float _phase_scale; +}; + +/** NEON kernel to perform HOG block normalization */ +class NEHOGBlockNormalizationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGBlockNormalizationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; + /** Default destructor */ + ~NEHOGBlockNormalizationKernel() = default; + + /** Initialise the kernel's input, output and HOG's metadata + * + * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell + * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog_info HOG's metadata + */ + void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised block normalization functions + * + * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor + * @param[out] output_ptr Pointer to the output block of the hog normalized space + * @param[in] input_stride Stride of the input hog space tensor + * @param[in] num_cells_per_block_height Number of cells per block along the Y direction + * @param[in] num_bins_block_x Number of bins per block along the X direction + * @param[in] num_bins_block Number of total bins per block + * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization + */ + using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, + float l2_hyst_threshold); + /** Block normalization function to use for the particular normalization type passed to configure() */ + BlockNormFunc *_func; + const ITensor *_input; + ITensor *_output; + Size2D _num_cells_per_block; + Size2D _num_cells_per_block_stride; + size_t _num_bins; + float _l2_hyst_threshold; +}; +} +#endif /* __ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h new file mode 100644 index 0000000000..e56d1e5fd8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ +#define __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform HOG detector kernel using linear SVM */ +class NEHOGDetectorKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHOGDetectorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHOGDetectorKernel(NEHOGDetectorKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = default; + /** Default destructor */ + ~NEHOGDetectorKernel() = default; + + /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect + * + * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block + * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel + * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the hog->info()->block_stride() + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input; + IDetectionWindowArray *_detection_windows; + const float *_hog_descriptor; + float _bias; + float _threshold; + uint16_t _idx_class; + size_t _num_bins_per_descriptor_x; + size_t _num_blocks_per_descriptor_y; + size_t _block_stride_width; + size_t _block_stride_height; + size_t _detection_window_width; + size_t _detection_window_height; + size_t _max_num_detection_windows; + std::mutex _mutex; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDETECTORKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h new file mode 100644 index 0000000000..0abd73ef97 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ +#define __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Common interface for all Harris Score kernels */ +class INEHarrisScoreKernel : public INEKernel +{ +public: + /** Default constructor */ + INEHarrisScoreKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; + /** Allow instances of this class to be moved */ + INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; + /** Default destructor */ + ~INEHarrisScoreKernel() = default; + +public: + /** Setup the kernel parameters + * + * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 + * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 + * @param[out] output Destination image (harris score). Data types supported: F32 + * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) + * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; + +protected: + const IImage *_input1; /**< Source image - Gx component */ + const IImage *_input2; /**< Source image - Gy component */ + IImage *_output; /**< Source image - Harris score */ + float _sensitivity; /**< Sensitivity value */ + float _strength_thresh; /**< Threshold value */ + float _norm_factor; /**< Normalization factor */ + BorderSize _border_size; /**< Border size */ +}; + +/** Template NEON kernel to perform Harris Score. + * The implementation supports 3, 5, and 7 for the block_size + */ +template +class NEHarrisScoreKernel : public INEHarrisScoreKernel +{ +public: + /** Default constructor */ + NEHarrisScoreKernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window) override; + +private: + /** Common signature for all the specialised harris score functions */ + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Interface for the accumulate Weighted kernel using F16 */ +template +class NEHarrisScoreFP16Kernel : public INEHarrisScoreKernel +{ +public: + /** Default constructor */ + NEHarrisScoreFP16Kernel(); + // Inherited methods overridden: + void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; + BorderSize border_size() const override; + void run(const Window &window) override; + +private: + using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, + float norm_factor, float sensitivity, float strength_thresh); + /** Harris Score function to use for the particular image types passed to configure() */ + HarrisScoreFunction *_func; +}; +#else +template +using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel; +#endif +} +#endif /* __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h new file mode 100644 index 0000000000..c4dbbeae83 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHistogramKernel.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ +#define __ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include +#include + +namespace arm_compute +{ +class IDistribution1D; +class ITensor; +using IImage = ITensor; + +/** Interface for the histogram kernel */ +class NEHistogramKernel : public INEKernel +{ +public: + /** Default constructor */ + NEHistogramKernel(); + /** Default destructor */ + ~NEHistogramKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel(const NEHistogramKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHistogramKernel(NEHistogramKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHistogramKernel &operator=(NEHistogramKernel &&) = default; + + /** Set the input image and the distribution output. + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution. + * @param[in,out] local_hist Array that the threads use to save their local histograms. + * It's size should be equal to (number_of_threads * num_bins), + * and the Window::thread_id() is used to determine the part of the array + * used by each thread. + * @param[out] window_lut LUT with pre-calculated possible window values. + * The size of the LUT should be equal to max_range_size and it will be filled + * during the configure stage, while it re-used in every run, therefore can be + * safely shared among threads. + */ + void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); + /** Set the input image and the distribution output. + * + * @note Used for histogram of fixed size equal to 256 + * + * @param[in] input Source image. Data type supported: U8. + * @param[out] output Destination distribution which must be of 256 bins.. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to merge multiple partial histograms. + * + * @param[out] global_hist Pointer to the final histogram. + * @param[in] local_hist Pointer to the partial histograms. + * @param[in] bins Number of bins. + */ + void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); + /** Function to merge multiple minimum values of partial histograms. + * + * @param[out] global_min Pointer to the global min value. + * @param[in] local_min Local min value. + */ + void merge_min(uint8_t *global_min, const uint8_t &local_min); + /** Function to perform histogram on the given window + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_U8(Window win); + /** Function to perform histogram on the given window where histogram is + * of fixed size 256 without ranges and offsets. + * + * @param[in] win Region on which to execute the kernel + */ + void histogram_fixed_U8(Window win); + /** Pre-calculate the pixel windowing for every possible pixel + * + * Calculate (V - offset) * numBins / range where V is every possible pixel value. + * + * @note We currently support U8 image thus possible pixel values are between 0 and 255 + */ + void calculate_window_lut() const; + /** Common signature for all the specialised Histogram functions + * + * @param[in] window Region on which to execute the kernel. + */ + using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window); + + HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() + const IImage *_input; + IDistribution1D *_output; + uint32_t *_local_hist; + uint32_t *_window_lut; + std::mutex _hist_mtx; + static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images +}; +} +#endif /*__ARM_COMPUTE_NEHISTOGRAMKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h new file mode 100644 index 0000000000..ebaafb467f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEIM2COLKERNEL_H__ +#define __ARM_COMPUTE_NEIM2COLKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class NEIm2ColKernel : public INEKernel +{ +public: + /** Default constructor */ + NEIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel(const NEIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + NEIm2ColKernel(NEIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; + /** Default destructor */ + ~NEIm2ColKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F32 + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] convolved_dims The convolved output dimensions. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const ITensor *input, ITensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Template function to run the im2col optimised for the fully connected layer case + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_reduced(const Window &window); + /** Template function to run the im2col used for the convolution layer case + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + template + void run_generic(const Window &window); + /** Common signature for all the specialised im2col functions + * + * @param[in] window Region on which to execute the kernel. + */ + using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); + + Im2ColFunctionPtr _func; + const ITensor *_input; + ITensor *_output; + std::pair _convolved_dims; + PadStrideInfo _conv_info; + unsigned int _kernel_size; + bool _has_bias; +}; +} +#endif /*__ARM_COMPUTE_NEIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h new file mode 100644 index 0000000000..13647889ab --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ +#define __ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform an image integral on an image */ +class NEIntegralImageKernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U32 + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + bool is_parallelisable() const override; +}; +} +#endif /*__ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h new file mode 100644 index 0000000000..9ab7f91092 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LKTRACKERKERNEL_H__ +#define __ARM_COMPUTE_LKTRACKERKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Internal keypoint class for Lucas-Kanade Optical Flow */ +struct NELKInternalKeypoint +{ + float x{ 0.f }; /**< x coordinate of the keypoint */ + float y{ 0.f }; /**< y coordinate of the keypoint */ + bool tracking_status{ false }; /**< the tracking status of the keypoint */ +}; + +using INELKInternalKeypointArray = IArray; + +/** Interface for the Lucas-Kanade tracker kernel */ +class NELKTrackerKernel : public INEKernel +{ +public: + /** Default constructor */ + NELKTrackerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel(const NELKTrackerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete; + /** Allow instances of this class to be moved */ + NELKTrackerKernel(NELKTrackerKernel &&) = default; + /** Allow instances of this class to be moved */ + NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default; + /** Default destructor */ + ~NELKTrackerKernel() = default; + + /** Initialise the kernel input and output + * + * @param[in] input_old Pointer to the input old tensor. Data type supported: U8 + * @param[in] input_new Pointer to the input new tensor. Data type supported. U8 + * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16 + * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points + * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the algorithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] level The pyramid level + * @param[in] num_levels The number of pyramid levels + * @param[in] pyramid_scale Scale factor used for generating the pyramid + */ + void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy, + const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points, + INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal, + Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension, + size_t level, size_t num_levels, float pyramid_scale); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Initialise the array of keypoints in the provide range + * + * @param[in] start Index of first element in the keypoints array to be initialised + * @param[in] end Index after last elelemnt in the keypoints array to be initialised + */ + void init_keypoints(int start, int end); + /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y + * + * @param[in] keypoint Keypoint for which gradients are computed + * @param[out] bilinear_ix Intermediate interpolated data for X gradient + * @param[out] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values A11, A12, A22 + */ + std::tuple compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int *bilinear_ix, int *bilinear_iy); + /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} + * + * @param[in] old_keypoint Old keypoint for which gradient is computed + * @param[in] new_keypoint New keypoint for which gradient is computed + * @param[in] bilinear_ix Intermediate interpolated data for X gradient + * @param[in] bilinear_iy Intermediate interpolated data for Y gradient + * + * @return Values b1, b2 + */ + std::pair compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int *bilinear_ix, const int *bilinear_iy); + + const ITensor *_input_old; + const ITensor *_input_new; + const ITensor *_old_scharr_gx; + const ITensor *_old_scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + INELKInternalKeypointArray *_old_points_internal; + INELKInternalKeypointArray *_new_points_internal; + Termination _termination; + bool _use_initial_estimate; + float _pyramid_scale; + float _epsilon; + unsigned int _num_iterations; + int _window_dimension; + unsigned int _level; + unsigned int _num_levels; + ValidRegion _valid_region; +}; +} +#endif /*__ARM_COMPUTE_NELKTRACKERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h new file mode 100644 index 0000000000..d4bff661f9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */ +class NELocallyConnectedMatrixMultiplyKernel : public INEKernel +{ +public: + /** Default constructor */ + NELocallyConnectedMatrixMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default; + /** Initialise the kernel's input and output + * + * @param[in] input0 First input tensor. Data types supported: F32 + * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + */ + void configure(const ITensor *input0, const ITensor *input1, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const ITensor *_input0; + const ITensor *_input1; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h new file mode 100644 index 0000000000..5d49901dd0 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ +#define __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMagnitudePhaseKernel(); + /** Destructor */ + ~NEMagnitudePhaseKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + +private: + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** Template interface for the kernel to compute magnitude and phase */ +template +class NEMagnitudePhaseFP16Kernel : public INEKernel +{ +public: + /** Default constructor */ + NEMagnitudePhaseFP16Kernel(); + /** Destructor */ + ~NEMagnitudePhaseFP16Kernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseFP16Kernel(const NEMagnitudePhaseFP16Kernel &) = delete; + /** Default move constructor */ + NEMagnitudePhaseFP16Kernel(NEMagnitudePhaseFP16Kernel &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMagnitudePhaseFP16Kernel &operator=(const NEMagnitudePhaseFP16Kernel &) = delete; + /** Default move assignment operator */ + NEMagnitudePhaseFP16Kernel &operator=(NEMagnitudePhaseFP16Kernel &&) = default; + + /** Initialise the kernel's input, output. + * + * @note At least one of out1 or out2 must be set + * + * @param[in] gx Gradient X tensor. Data type supported: S16. + * @param[in] gy Gradient Y tensor. Data type supported: S16. + * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. + * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. + */ + void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Function to perform magnitude on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude(const Window &window); + /** Function to perform phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void phase(const Window &window); + /** Function to perform magnitude and phase on the given window + * + * @param[in] window Region on which to execute the kernel + */ + void magnitude_phase(const Window &window); + + /** Common signature for all the specialised MagnitudePhase functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseFP16Kernel::*)(const Window &window); + /** MagnitudePhase function to use for the particular formats passed to configure() */ + MagnitudePhaseFunctionPtr _func; + const ITensor *_gx; /**< Input gradient X */ + const ITensor *_gy; /**< Input gradient Y */ + ITensor *_magnitude; /**< Output - Magnitude */ + ITensor *_phase; /**< Output - Phase */ +}; +#else +template +using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel; +#endif +} +#endif /* __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h new file mode 100644 index 0000000000..83407ccb7d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ +#define __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ +class NEMeanStdDevKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMeanStdDevKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMeanStdDevKernel(NEMeanStdDevKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = default; + /** Default destructor */ + ~NEMeanStdDevKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Input average pixel value. + * @param[out] global_sum Keeps global sum of pixel values. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. + */ + void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IImage *_input; + float *_mean; + float *_stddev; + uint64_t *_global_sum; + uint64_t *_global_sum_squared; + std::mutex _mtx; +}; +} +#endif /* __ARM_COMPUTE_NEMEANSTDDEVKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h new file mode 100644 index 0000000000..dee1aadfb9 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ +#define __ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Kernel to perform a median filter on a tensor */ +class NEMedian3x3Kernel : public INESimpleKernel +{ +public: + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; +}; +} +#endif /*__ARM_COMPUTE_NEMEDIAN3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h new file mode 100644 index 0000000000..e405ea5ae4 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ +#define __ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/INEKernel.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Interface for the kernel to perform min max search on an image. */ +class NEMinMaxKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMinMaxKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel(const NEMinMaxKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxKernel(NEMinMaxKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxKernel &operator=(NEMinMaxKernel &&) = default; + /** Default destructor */ + ~NEMinMaxKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8/S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + */ + void configure(const IImage *input, int32_t *min, int32_t *max); + /** Resets global minimum and maximum. */ + void reset(); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Performs the min/max algorithm on U8 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_U8(const Window &win); + /** Performs the min/max algorithm on S16 images on a given window. + * + * @param win The window to run the algorithm on. + */ + void minmax_S16(const Window &win); + /** Common signature for all the specialised MinMax functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxFunction = void (NEMinMaxKernel::*)(const Window &window); + /** MinMax function to use for the particular image types passed to configure() */ + MinMaxFunction _func; + /** Helper to update min/max values **/ + template + void update_min_max(T min, T max); + + const IImage *_input; /**< Input image. */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + int32_t _min_init; /**< Value to initialise global minimum value. */ + int32_t _max_init; /**< Value to initialise global maximum value. */ + std::mutex _mtx; /**< Mutex used for result reduction. */ +}; + +/** Interface for the kernel to find min max locations of an image. */ +class NEMinMaxLocationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEMinMaxLocationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; + /** Default destructor */ + ~NEMinMaxLocationKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Input Image. Data types supported: U8 or S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc Array of minimum value locations. + * @param[out] max_loc Array of maximum value locations. + * @param[out] min_count Number of minimum value encounters. + * @param[out] max_count Number of maximum value encounters. + */ + void configure(const IImage *input, int32_t *min, int32_t *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + /** Performs the min/max location algorithm on T type images on a given window. + * + * @param win The window to run the algorithm on. + */ + template + void minmax_loc(const Window &win); + /** Common signature for all the specialised MinMaxLoc functions + * + * @param[in] window Region on which to execute the kernel. + */ + using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); + /** MinMaxLoc function to use for the particular image types passed to configure() */ + MinMaxLocFunction _func; + /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ + template + struct create_func_table; + + const IImage *_input; /**< Input image. */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Count of minimum value encounters. */ + uint32_t *_max_count; /**< Count of maximum value encounters. */ + ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ + ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ + unsigned int _num_elems_processed_per_iteration; /**< Elements processed per iteration. */ +}; +} +#endif /*__ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h new file mode 100644 index 0000000000..ede0294a73 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ +#define __ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to apply a non-linear filter */ +class NENonLinearFilterKernel : public INEKernel +{ +public: + /** Default constructor */ + NENonLinearFilterKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; + /** Allow instances of this class to be moved */ + NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; + /** Set the source, destination and border mode of the kernel + * + * @param[in] input Source tensor. Data type supported: U8 + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Fill mask with the corresponding given pattern. + * + * @param[in,out] mask Mask to be filled according to pattern + * @param[in] cols Columns (width) of mask + * @param[in] rows Rows (height) of mask + * @param[in] pattern Pattern to fill the mask according to + */ + void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); + /** Apply a median filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_box(const Window &win); + /** Apply a min filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_box(const Window &win); + /** Apply a max filter when given mask pattern is defined as box. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_box(const Window &win); + /** Apply a median filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_cross(const Window &win); + /** Apply a min filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_cross(const Window &win); + /** Apply a max filter when given mask pattern is defined as cross. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_cross(const Window &win); + /** Apply a median filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void median_filter_disk(const Window &win); + /** Apply a min filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void min_filter_disk(const Window &win); + /** Apply a max filter when given mask pattern is defined as disk. + * + * @param[in] win Window to apply the filter on. + */ + template + void max_filter_disk(const Window &win); + /** Apply a non-linear filter when given mask has user-defined pattern. + * + * @param[in] win Window to apply the filter on. + */ + template + void non_linear_filter_generic(const Window &win); + +private: + unsigned int _border_width; + const ITensor *_input; + ITensor *_output; + const uint8_t *_mask; + MatrixPattern _pattern; + NonLinearFilterFunction _function; + unsigned int _func_idx; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NENONLINEARFILTERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h new file mode 100644 index 0000000000..0daae59e54 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ +#define __ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON + * + * @note Used by @ref NEFastCorners and @ref NEHarrisCorners + */ +class NENonMaximaSuppression3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NENonMaximaSuppression3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default; + /** Default destructor */ + ~NENonMaximaSuppression3x3Kernel() = default; + + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +protected: + /** Common signature for all the specialised non-maxima suppression 3x3 functions + * + * @param[in] input_ptr Pointer to the input tensor. + * @param[out] output_ptr Pointer to the output tensor + * @param[in] input_stride Stride of the input tensor + */ + using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride); + + NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ + const ITensor *_input; /**< Source tensor */ + ITensor *_output; /**< Destination tensor */ +}; + +#ifdef ARM_COMPUTE_ENABLE_FP16 +/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32 + */ +class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel +{ +public: + /** Initialise the kernel's sources, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output, bool border_undefined); +}; +#else +using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; +#endif +} +#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h new file mode 100644 index 0000000000..d4e36d5ff1 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the normalization layer kernel. + */ +class NENormalizationLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NENormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; + /** Default destructor */ + ~NENormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: QS8/F32. + * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type supported: same as @p input + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform normalization depending on the given template + * dimension. The second template parameter specifies whether the + * normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + */ + template + void normalize(const Window &window); + + /** Function to perform normalization for fixed-point values depending on + * the given template dimension. The second template parameter specifies + * whether the normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + */ + template + void normalize_fixed_point(const Window &window); + /** Common signature for all the specialised normalization functions + * + * @param[in] window Region on which to execute the kernel. + */ + using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); + +private: + NormalizationFunction _func; + const ITensor *_input; + const ITensor *_input_squared; + ITensor *_output; + NormalizationLayerInfo _norm_info; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..7e402cd220 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to perform addition between two tensors */ +class NEPixelWiseMultiplicationKernel : public INEKernel +{ +public: + /** Default constructor */ + NEPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; + /** Default destructor */ + ~NEPixelWiseMultiplicationKernel() = default; + /** Initialise the kernel's input, output and border mode. + * + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 An input tensor. Data types supported: U8/QS8/S16/F32. + * @param[in] input2 An input tensor. Data types supported: U8/QS8/S16/F32. + * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8) /S16/F32. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. + * @param[in] rounding_policy Rounding policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the specialised multiplication functions with integer scaling factor + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor. + */ + using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale); + /** Common signature for all the specialised multiplication functions with fixed-point values + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[in] scale Scaling factor. + * @param[in] fixed_point_position Fixed-point position that expresses the number of bits for the fractional part of the number. + * @param[out] output_ptr Pointer to the output tensor. + */ + using MulFunctionQInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale, int fixed_point_position); + /** Common signature for all the specialised multiplication functions with float scaling factor + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor. + */ + using MulFunctionFloat = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale); + + MulFunctionFloat *_func_float; + MulFunctionInt *_func_int; + MulFunctionQInt *_func_q_int; + +private: + const ITensor *_input1; + const ITensor *_input2; + ITensor *_output; + float _scale; + int _scale_exponent; +}; +} +#endif /*__ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h new file mode 100644 index 0000000000..62a087841a --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the pooling layer kernel */ +class NEPoolingLayerKernel : public INEKernel +{ +public: + /** Default constructor */ + NEPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; + /** Default destructor */ + ~NEPoolingLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** Function to perform 2x2 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_f32(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 8bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_q8(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_f32(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling for 8bit fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_q8(const Window &window_input, const Window &window); + /** Common signature for all the specialised Pooling functions + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window); + +private: + PoolingFunction _func; + const ITensor *_input; + ITensor *_output; + PoolingLayerInfo _pool_info; + int _num_elems_processed_per_iteration; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h new file mode 100644 index 0000000000..f9eae68ee8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NERemapKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEREMAPKERNEL_H__ +#define __ARM_COMPUTE_NEREMAPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform a remap on a tensor */ +class NERemapKernel : public INEKernel +{ +public: + /** Default constructor */ + NERemapKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel(const NERemapKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NERemapKernel &operator=(const NERemapKernel &) = delete; + /** Allow instances of this class to be moved */ + NERemapKernel(NERemapKernel &&) = default; + /** Allow instances of this class to be moved */ + NERemapKernel &operator=(NERemapKernel &&) = default; + /** Default destructor */ + ~NERemapKernel() = default; + + /** Initialize the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] policy The interpolation type. + */ + void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** function to perform nearest interpolation on the given window */ + void remap_nearest(const Window &window); + /** function to perform bilinear interpolation on the given window */ + void remap_bilinear(const Window &window); + /** Remap function to use for the particular interpolation type passed to configure() */ + void (NERemapKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input image */ + ITensor *_output; /**< Output image */ + const ITensor *_map_x; /**< Input remap x coordinates */ + const ITensor *_map_y; /**< Input remap y coordinates */ +}; +} +#endif /*__ARM_COMPUTE_NEREMAPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h new file mode 100644 index 0000000000..03e26520b5 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCALEKERNEL_H__ +#define __ARM_COMPUTE_NESCALEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform scaling on a tensor */ +class NEScaleKernel : public INEKernel +{ +public: + /** Default constructor */ + NEScaleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel(const NEScaleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScaleKernel &operator=(const NEScaleKernel &) = delete; + /** Allow instances of this class to be moved */ + NEScaleKernel(NEScaleKernel &&) = default; + /** Allow instances of this class to be moved */ + NEScaleKernel &operator=(NEScaleKernel &&) = default; + /** Default destructor */ + ~NEScaleKernel() = default; + + /** Initialise the kernel's inputs, output and interpolation policy + * + * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor + * + * @param[in] input Source tensor. Data types supported: U8/S16. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[out] output Destination tensor. Data types supported: U8/S16. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + /** function to perform scale using nearest interpolation on the given window */ + void scale_nearest(const Window &window); + /** function to perform scale using bilinear interpolation on the given window */ + void scale_bilinear(const Window &window); + /** function to perform scale using area interpolation on the given window + * + * @note Used only in case down-sampling. + */ + void scale_area(const Window &window); + /** Scale function to use for the particular interpolation type passed to configure() */ + void (NEScaleKernel::*_func)(const Window &window); + + const ITensor *_offsets; + const ITensor *_dx; + const ITensor *_dy; + const ITensor *_input; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NESCALEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h new file mode 100644 index 0000000000..c618456d49 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHARR3x3KERNEL_H__ +#define __ARM_COMPUTE_NESCHARR3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. + * +* @f[ +* \mathbf{G}_x=\begin{vmatrix} +* -3 & 0 & +3\\ +* -10& 0 & +10\\ +* -3 & 0 & +3 +* \end{vmatrix} +* @f] +*/ +class NEScharr3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NEScharr3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; + /** Default destructor */ + ~NEScharr3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + bool _run_scharr_x; /**< Do we need to run Scharr X ? */ + bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for scharr X */ + ITensor *_output_y; /**< Output tensor for scharr Y */ +}; +} +#endif /*__ARM_COMPUTE_NESCHARR3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h new file mode 100644 index 0000000000..246dd83573 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL3x3KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL3x3KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. + * + * @f[ + * \mathbf{G}_x=\begin{vmatrix} + * -1 & 0 & +1\\ + * -2 & 0 & +2\\ + * -1 & 0 & +1 + * \end{vmatrix} + * @f] +*/ +class NESobel3x3Kernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel(NESobel3x3Kernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; + /** Default destructor */ + ~NESobel3x3Kernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + bool _run_sobel_x; /**< Do we need to run Sobel X ? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< Output tensor for sobel X */ + ITensor *_output_y; /**< Output tensor for sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL3x3KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h new file mode 100644 index 0000000000..49c1c41e6d --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL5x5KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL5x5KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. + * + */ +class NESobel5x5HorKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel5x5HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; + /** Default destructor */ + ~NESobel5x5HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. + * +*/ +class NESobel5x5VertKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel5x5VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; + /** Default destructor */ + ~NESobel5x5VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. + * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. + * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. + * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + ITensor *_input_x; /**< X input (X output of the hor pass) */ + ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL5x5KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h new file mode 100644 index 0000000000..4bff8596b8 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL7x7KERNEL_H__ +#define __ARM_COMPUTE_NESOBEL7x7KERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. + * + */ +class NESobel7x7HorKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel7x7HorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; + /** Default destructor */ + ~NESobel7x7HorKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input; /**< Input tensor */ + ITensor *_output_x; /**< X output of horizontal pass */ + ITensor *_output_y; /**< Y output of horizontal pass */ + bool _run_sobel_x; /**< Do we need to run Sobel X? */ + bool _run_sobel_y; /**< Do we need to run Sobel Y? */ + BorderSize _border_size; /**< Border size */ +}; + +/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. + * +*/ +class NESobel7x7VertKernel : public INEKernel +{ +public: + /** Default constructor */ + NESobel7x7VertKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; + /** Allow instances of this class to be moved */ + NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; + /** Default destructor */ + ~NESobel7x7VertKernel() = default; + + /** Initialise the kernel's source, destination and border mode. + * + * @note At least one of output_x or output_y must be set + * @note If output_x is set then input_x must be set too + * @note If output_y is set then input_y must be set too + * + * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. + * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. + * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. + * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. + * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + */ + void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const ITensor *_input_x; /**< X input (X output of the hor pass) */ + const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ + ITensor *_output_x; /**< X output of sobel */ + ITensor *_output_y; /**< Y output of sobel */ + bool _run_sobel_x; /**< Do we need to run sobel X? */ + bool _run_sobel_y; /**< Do we need to run sobel Y? */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL7x7KERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h new file mode 100644 index 0000000000..ab626ad5ec --- /dev/null +++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the identifying the max value of 1D Logits */ +class NELogits1DMaxKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NELogits1DMaxKernel(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, F32. + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + using Logits1DMaxFunction = void(const ITensor *in, ITensor *out, const Window &window); + +private: + Logits1DMaxFunction *_func; + BorderSize _border_size; +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class NELogits1DShiftExpSumKernel : public INEKernel +{ +public: + /** Default constructor */ + NELogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DShiftExpSumKernel(const NELogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DShiftExpSumKernel &operator=(const NELogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DShiftExpSumKernel(NELogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DShiftExpSumKernel &operator=(NELogits1DShiftExpSumKernel &&) = default; + /** Default destructor */ + ~NELogits1DShiftExpSumKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, F32. + * @param[in] max Max values tensor. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input. + */ + void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using Logits1DShiftExpSumFunction = void(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window); + +private: + Logits1DShiftExpSumFunction *_func; + const ITensor *_input; + const ITensor *_max; + ITensor *_output; + ITensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class NELogits1DNormKernel : public INEKernel +{ +public: + /** Default constructor */ + NELogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DNormKernel(const NELogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NELogits1DNormKernel &operator=(const NELogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + NELogits1DNormKernel(NELogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + NELogits1DNormKernel &operator=(NELogits1DNormKernel &&) = default; + /** Default destructor */ + ~NELogits1DNormKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8, F32. + * @param[in] sum Sum tensor. The number of dimensions should be dim(input)-1. Data types supported: same as @p input. + * @param[out] output Destination tensor. Data types supported: same as @p input. + */ + void configure(const ITensor *input, const ITensor *sum, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using Logits1DNormFunction = void(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window); + +private: + Logits1DNormFunction *_func; + const ITensor *_input; + const ITensor *_sum; + ITensor *_output; +}; +} +#endif /*__ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h new file mode 100644 index 0000000000..b3963e5a75 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NETableLookupKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ +#define __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ + +#include "arm_compute/core/NEON/INESimpleKernel.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Interface for the kernel to perform table lookup calculations. */ +class NETableLookupKernel : public INESimpleKernel +{ +public: + /** Default constructor */ + NETableLookupKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel(const NETableLookupKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; + /** Allow instances of this class to be moved */ + NETableLookupKernel(NETableLookupKernel &&) = default; + /** Allow instances of this class to be moved */ + NETableLookupKernel &operator=(NETableLookupKernel &&) = default; + /** Initialise the kernel's input, lut and output. + * + * @param[in] input An input tensor. Data types supported: U8/S16. + * @param[in] lut The input LUT. + * @param[out] output The output tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Perform table lookup on a given window. + * + * @param window window Region on which to execute the kernel. + */ + template + void tableLookup(const Window &window); + /** Common signature for all the specialised lut functions + * + * @param[in] window Region on which to execute the kernel. + */ + using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); + /** Sub function to use for the particular tensor types passed to configure() */ + TableLookupFunction _func; + const ILut *_lut; +}; +} +#endif /* __ARM_COMPUTE_NETABLELOOKUPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h new file mode 100644 index 0000000000..778176293f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEThresholdKernel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETHRESHOLDKERNEL_H__ +#define __ARM_COMPUTE_NETHRESHOLDKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Interface for the thresholding kernel + * + */ +class NEThresholdKernel : public INEKernel +{ +public: + /** Constructor + * Initialize all the pointers to nullptr and parameters to zero. + */ + NEThresholdKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel(const NEThresholdKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; + /** Initialise the kernel's input, output and threshold parameters. + * + * @param[in] input An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + * @param[in] threshold Threshold. When the threhold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** run binary thresholding on the given window */ + void run_binary(const Window &window); + /** run range thresholding on the given window */ + void run_range(const Window &window); + + void (NEThresholdKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input */ + ITensor *_output; /**< Output */ + uint8_t _threshold; + uint8_t _false_value; + uint8_t _true_value; + uint8_t _upper; +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLDKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h new file mode 100644 index 0000000000..ac9449ff92 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class NETransposeKernel : public INEKernel +{ +public: + /** Default constructor */ + NETransposeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel(const NETransposeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETransposeKernel &operator=(const NETransposeKernel &) = delete; + /** Allow instances of this class to be moved */ + NETransposeKernel(NETransposeKernel &&) = default; + /** Allow instances of this class to be moved */ + NETransposeKernel &operator=(NETransposeKernel &&) = default; + /** Default destructor */ + ~NETransposeKernel() = default; + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Common signature for all the transpose functions + * + * @param[in] input An input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output The output tensor. Data type supported: same as @p input + * @param[in] window Region on which to execute the kernel. + */ + using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window); + /** Transpose function to use for the particular tensor types passed to configure() */ + TransposeFunction *_func; + const ITensor *_input; + ITensor *_output; +}; +} +#endif /* __ARM_COMPUTE_NETRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h new file mode 100644 index 0000000000..10fed1d450 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEWarpKernel.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPKERNEL_H__ +#define __ARM_COMPUTE_NEWARPKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Common interface for warp affine and warp perspective */ +class INEWarpKernel : public INEKernel +{ +public: + /** Default constructor */ + INEWarpKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel(const INEWarpKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + INEWarpKernel &operator=(const INEWarpKernel &) = delete; + /** Allow instances of this class to be moved */ + INEWarpKernel(INEWarpKernel &&) = default; + /** Allow instances of this class to be moved */ + INEWarpKernel &operator=(INEWarpKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. + * @param[in] border_mode Strategy to use for borders + * @param[in] constant_border_value Constant value used for filling the border. + */ + virtual void configure(const ITensor *input, ITensor *output, const float *matrix, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run(const Window &window) override; + +protected: + /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_undefined(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_constant(const Window &window) = 0; + /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE + * + * @param[in] window Region on which to execute the kernel + */ + virtual void warp_replicate(const Window &window) = 0; + /** Common signature for all the specialised warp functions + * + * @param[in] window Region on which to execute the kernel. + */ + void (INEWarpKernel::*_func)(const Window &window); + + const ITensor *_input; /**< Input Tensor */ + ITensor *_output; /**< Output Tensor */ + uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ + const float *_matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ +}; + +/** Template interface for the kernel to compute warp affine + * + */ +template +class NEWarpAffineKernel : public INEWarpKernel +{ +private: + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; + +/** Template interface for the kernel to compute warp perspective + * + */ +template +class NEWarpPerspectiveKernel : public INEWarpKernel +{ +private: + // Inherited methods overridden: + void warp_undefined(const Window &window) override; + void warp_constant(const Window &window) override; + void warp_replicate(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_NEWARPKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h new file mode 100644 index 0000000000..cad2d00b1f --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer + * + * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. + * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. + * + * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: + * @f[ + * \left( \begin{array}{ccc} + * a000 & a001 & a002 \\ + * a010 & a011 & a012 \\ + * a020 & a021 & a022 \\ + * \end{array} \right) + * \left( \begin{array}{ccc} + * a100 & a101 & a102 \\ + * a110 & a111 & a112 \\ + * a120 & a121 & a122 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccc} + * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ + * \end{array} \right) + * @f] + */ +class NEWeightsReshapeKernel : public INEKernel +{ +public: + /** Constructor.*/ + NEWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default; + /** Default destructor */ + ~NEWeightsReshapeKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/F32 + * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @param[out] output The output tensor. Data types supported: Same as @p input + */ + void configure(const ITensor *input, const ITensor *bias, ITensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + using WeightsReshapeKernel = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window); + + WeightsReshapeKernel *_func; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h new file mode 100644 index 0000000000..b4912ce15a --- /dev/null +++ b/arm_compute/core/PixelValue.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PIXELVALUE_H__ +#define __ARM_COMPUTE_PIXELVALUE_H__ + +#include + +namespace arm_compute +{ +/** Class describing the value of a pixel for any image format. */ +class PixelValue +{ +public: + /** Default constructor: value initialized to 0 */ + PixelValue() + : value{ { 0 } } + { + } + /** Initialize the union with a U8 pixel value + * + * @param[in] v U8 value. + */ + PixelValue(uint8_t v) + : PixelValue() + { + value.u8 = v; + } + /** Initialize the union with a U16 pixel value + * + * @param[in] v U16 value. + */ + PixelValue(uint16_t v) + : PixelValue() + { + value.u16 = v; + } + /** Initialize the union with a S16 pixel value + * + * @param[in] v S16 value. + */ + PixelValue(int16_t v) + : PixelValue() + { + value.s16 = v; + } + /** Initialize the union with a U32 pixel value + * + * @param[in] v U32 value. + */ + PixelValue(uint32_t v) + : PixelValue() + { + value.u32 = v; + } + /** Initialize the union with a S32 pixel value + * + * @param[in] v S32 value. + */ + PixelValue(int32_t v) + : PixelValue() + { + value.s32 = v; + } + /** Initialize the union with a F32 pixel value + * + * @param[in] v F32 value. + */ + PixelValue(float v) + : PixelValue() + { + value.f32 = v; + } + /** Union which describes the value of a pixel for any image format. + * Use the field corresponding to the image format + */ + union + { + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + float f32; /**< Single channel float 32 */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ + } value; + /** Interpret the pixel value as a U8 + * + * @param[out] v Returned value + */ + void get(uint8_t &v) const + { + v = value.u8; + } + /** Interpret the pixel value as a S8 + * + * @param[out] v Returned value + */ + void get(int8_t &v) const + { + v = value.s8; + } + /** Interpret the pixel value as a U16 + * + * @param[out] v Returned value + */ + void get(uint16_t &v) const + { + v = value.u16; + } + /** Interpret the pixel value as a S16 + * + * @param[out] v Returned value + */ + void get(int16_t &v) const + { + v = value.s16; + } + /** Interpret the pixel value as a U32 + * + * @param[out] v Returned value + */ + void get(uint32_t &v) const + { + v = value.u32; + } + /** Interpret the pixel value as a S32 + * + * @param[out] v Returned value + */ + void get(int32_t &v) const + { + v = value.s32; + } + /** Interpret the pixel value as a F32 + * + * @param[out] v Returned value + */ + void get(float &v) const + { + v = value.f32; + } +}; +} +#endif /* __ARM_COMPUTE_PIXELVALUE_H__ */ diff --git a/arm_compute/core/PyramidInfo.h b/arm_compute/core/PyramidInfo.h new file mode 100644 index 0000000000..76b3852bbf --- /dev/null +++ b/arm_compute/core/PyramidInfo.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PYRAMIDINFO_H__ +#define __ARM_COMPUTE_PYRAMIDINFO_H__ + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Store the Pyramid's metadata */ +class PyramidInfo +{ +public: + /** Default constructor */ + PyramidInfo(); + /** Default destructor */ + virtual ~PyramidInfo() = default; + /** Allow instances of this class to be copy constructed */ + PyramidInfo(const PyramidInfo &) = default; + /** Allow instances of this class to be copied */ + PyramidInfo &operator=(const PyramidInfo &) = default; + /** Allow instances of this class to be move constructed */ + PyramidInfo(PyramidInfo &&) = default; + /** Allow instances of this class to be moved */ + PyramidInfo &operator=(PyramidInfo &&) = default; + + /** Create pyramid info for 2D tensors + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] width The width of the 2D tensor at 0th pyramid level + * @param[in] height The height of the 2D tensor at 0th pyramid level + * @param[in] format The format of all 2D tensors in the pyramid + * NV12, NV21, IYUV, UYVY and YUYV formats are not supported. + */ + PyramidInfo(size_t num_levels, float scale, size_t width, size_t height, Format format); + + /** Create pyramid info using TensorShape + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements + * @param[in] format The format of all tensors in the pyramid + */ + PyramidInfo(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format); + + /** Initialize pyramid's metadata for 2D tensors + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] width The width of the 2D tensor at 0th pyramid level + * @param[in] height The height of the 2D tensor at 0th pyramid level + * @param[in] format The format of all 2D tensors in the pyramid + * NV12, NV21, IYUV, UYVY and YUYV formats are not supported. + */ + void init(size_t num_levels, float scale, size_t width, size_t height, Format format); + /** Initialize pyramid's metadata using TensorShape + * + * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value + * @param[in] scale Used to indicate the scale between the pyramid levels. + * This is required to be a non-zero positive value. + * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements + * @param[in] format The format of all tensors in the pyramid + */ + void init(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format); + /** Return the number of the pyramid levels + * + * @return The number of the pyramid levels + */ + size_t num_levels() const; + /** Return the width of the 0th level tensor + * + * @return The width of the 0th level tensor + */ + size_t width() const; + /** Return the height of the 0th level tensor + * + * @return The height of the 0th level tensor + */ + size_t height() const; + /** Return the TensorShape of the o-th level tensor + * + * @return + */ + const TensorShape &tensor_shape() const; + /** Return the image format of all tensor in the pyramid + * + * @return The image format + */ + Format format() const; + /** Return the scale factor of the pyramid + * + * @return Return the scale factor + */ + float scale() const; + +private: + size_t _num_levels; + TensorShape _tensor_shape; + Format _format; + float _scale; +}; +} +#endif /*__ARM_COMPUTE_PYRAMIDINFO_H__ */ diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h new file mode 100644 index 0000000000..cb053ea2c4 --- /dev/null +++ b/arm_compute/core/Size2D.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SIZE2D_H__ +#define __ARM_COMPUTE_SIZE2D_H__ + +#include + +namespace arm_compute +{ +/** Class for specifying the size of an image or rectangle */ +class Size2D +{ +public: + /** Default constructor */ + Size2D() + : width(0), height(0) + { + } + /** Constructor. Initializes "width" and "height" respectively with "w" and "h" + * + * @param[in] w Width of the image or rectangle + * @param[in] h Height of the image or rectangle + */ + Size2D(size_t w, size_t h) + : width(w), height(h) + { + } + /** Constructor. Initializes "width" and "height" with the dimensions of "size" + * + * @param[in] size Size data object + */ + Size2D(const Size2D &size) + : width(size.width), height(size.height) + { + } + /** Copy assignment + * + * @param[in] size Constant reference input "Size2D" data object to copy + * + * @return Reference to the newly altered left hand side "Size2D" data object + */ + Size2D &operator=(const Size2D &size) + { + width = size.width; + height = size.height; + return *this; + } + /** The area of the image or rectangle calculated as (width * height) + * + * @return Area (width * height) + * + */ + size_t area() const + { + return (width * height); + } + +public: + size_t width; /**< Width of the image region or rectangle */ + size_t height; /**< Height of the image region or rectangle */ +}; +} +#endif /*__ARM_COMPUTE_SIZE2D_H__ */ diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h new file mode 100644 index 0000000000..33a88a2568 --- /dev/null +++ b/arm_compute/core/Steps.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_STEPS_H__ +#define __ARM_COMPUTE_STEPS_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Class to describe a number of elements in each dimension. Similar to @ref + * Strides but not in bytes but number of elements. + */ +class Steps : public Dimensions +{ +public: + /** Constructor to initialize the steps. + * + * @param[in] steps Values to initialize the steps. + */ + template + Steps(Ts... steps) + : Dimensions{ steps... } + { + // Initialize empty dimensions to 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + /** Allow instances of this class to be copy constructed */ + constexpr Steps(const Steps &) = default; + /** Allow instances of this class to be copied */ + Steps &operator=(const Steps &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Steps(Steps &&) = default; + /** Allow instances of this class to be moved */ + Steps &operator=(Steps &&) = default; + /** Default destructor */ + ~Steps() = default; +}; +} +#endif /*__ARM_COMPUTE_STEPS_H__*/ diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h new file mode 100644 index 0000000000..329fafb5f8 --- /dev/null +++ b/arm_compute/core/Strides.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_STRIDES_H__ +#define __ARM_COMPUTE_STRIDES_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Strides of an item in bytes */ +class Strides : public Dimensions +{ +public: + /** Constructor to initialize the strides. + * + * @param[in] strides Values to initialize the strides. + */ + template + constexpr Strides(Ts... strides) + : Dimensions{ strides... } + { + } + /** Allow instances of this class to be copy constructed */ + constexpr Strides(const Strides &) = default; + /** Allow instances of this class to be copied */ + Strides &operator=(const Strides &) = default; + /** Allow instances of this class to be move constructed */ + constexpr Strides(Strides &&) = default; + /** Allow instances of this class to be moved */ + Strides &operator=(Strides &&) = default; + /** Default destructor */ + ~Strides() = default; +}; +} +#endif /*__ARM_COMPUTE_STRIDES_H__*/ diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h new file mode 100644 index 0000000000..e2532fd487 --- /dev/null +++ b/arm_compute/core/SubTensorInfo.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SUBTENSORINFO_H__ +#define __ARM_COMPUTE_SUBTENSORINFO_H__ + +#include "arm_compute/core/ITensorInfo.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Validate.h" + +#include + +namespace arm_compute +{ +/** Store the sub tensor's metadata */ +class SubTensorInfo final : public ITensorInfo +{ +public: + /** Default constructor */ + SubTensorInfo(); + /** Default constructor + * + * @param[in] parent Metadata of parent tensor. + * @param[in] tensor_shape Tensor shape. Shape must fit inside parent's shape. + * X and Y dimensions must match the parent's ones. + * @param[in] coords Coordinates of starting element inside parent tensor. + */ + SubTensorInfo(ITensorInfo *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Default destructor */ + ~SubTensorInfo() = default; + /** Allow instances of this class to be copy constructed */ + SubTensorInfo(const SubTensorInfo &) = default; + /** Allow instances of this class to be copied */ + SubTensorInfo &operator=(const SubTensorInfo &) = default; + /** Allow instances of this class to be move constructed */ + SubTensorInfo(SubTensorInfo &&) = default; + /** Allow instances of this class to be moved */ + SubTensorInfo &operator=(SubTensorInfo &&) = default; + + // Inherited methods overridden: + void set_data_type(DataType data_type) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_data_type(data_type); + }; + void set_num_channels(int num_channels) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_num_channels(num_channels); + }; + void set_format(Format format) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_format(format); + }; + void set_fixed_point_position(int fixed_point_position) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_fixed_point_position(fixed_point_position); + }; + void set_tensor_shape(TensorShape shape) override; + bool auto_padding() override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->auto_padding(); + }; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override + { + return _tensor_shape[index]; + } + const Strides &strides_in_bytes() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->strides_in_bytes(); + } + size_t offset_first_element_in_bytes() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->offset_element_in_bytes(_coords); + } + size_t offset_element_in_bytes(const Coordinates &pos) const override; + int fixed_point_position() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->fixed_point_position(); + } + size_t element_size() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->element_size(); + } + size_t num_dimensions() const override + { + return _tensor_shape.num_dimensions(); + } + size_t num_channels() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->num_channels(); + } + const TensorShape &tensor_shape() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _tensor_shape; + } + DataType data_type() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->data_type(); + } + Format format() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->format(); + } + size_t total_size() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->total_size(); + } + PaddingSize padding() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->padding(); + } + bool has_padding() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->has_padding(); + } + bool is_resizable() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->is_resizable(); + } + void set_is_resizable(bool is_resizable) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_is_resizable(is_resizable); + } + ValidRegion valid_region() const override + { + return _valid_region; + } + void set_valid_region(ValidRegion valid_region) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region); + _valid_region = std::move(valid_region); + } + +private: + ITensorInfo *_parent; + TensorShape _tensor_shape; + Coordinates _coords; + ValidRegion _valid_region; +}; +} +#endif /*__ARM_COMPUTE_SUBTENSORINFO_H__ */ diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h new file mode 100644 index 0000000000..35b9ccb9ff --- /dev/null +++ b/arm_compute/core/TensorInfo.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORINFO_H__ +#define __ARM_COMPUTE_TENSORINFO_H__ + +#include "arm_compute/core/ITensorInfo.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Strides.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" + +#include + +namespace arm_compute +{ +class HOGInfo; + +/** Store the tensor's metadata */ +class TensorInfo final : public ITensorInfo +{ +public: + /** Default constructor */ + TensorInfo(); + /** Default destructor */ + ~TensorInfo() = default; + /** Allow instances of this class to be copy constructed */ + TensorInfo(const ITensorInfo &info); + /** Allow instances of this class to be copy constructed */ + TensorInfo(const TensorInfo &) = default; + /** Allow instances of this class to be copied */ + TensorInfo &operator=(const TensorInfo &) = default; + /** Allow instances of this class to be move constructed */ + TensorInfo(TensorInfo &&) = default; + /** Allow instances of this class to be moved */ + TensorInfo &operator=(TensorInfo &&) = default; + + /** Construct a tensor info with a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] format Format of the tensor. + */ + TensorInfo(Format format); + + /** 2D tensor constructor + * + * @param[in] width Width of the 2D tensor + * @param[in] height Height of the 2D tensor + * @param[in] format Single plane format of the tensor. + */ + TensorInfo(unsigned int width, unsigned int height, Format format); + /** Constructor + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + */ + TensorInfo(const TensorShape &tensor_shape, Format format); + + /** Construct a tensor info with a data type and number of channels. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) It specifies the fixed point position when the tensor data type is QS8, QS16 or QS32. + */ + TensorInfo(size_t num_channels, DataType data_type, size_t fixed_point_position = 0); + + /** Constructor + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + */ + TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + /** Constructor + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height); + + /** Initialize the tensor info with just a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] format Single plane format of the tensor. + */ + void init(Format format); + + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + */ + void init(const TensorShape &tensor_shape, Format format); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] format Single plane format of the tensor. + * @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor. + * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. + * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). + */ + void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes); + + /** Initialize the tensor info with just a format. + * + * Can be used for automatic derivation of the shape by the function. + * + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] fixed_point_position (Optional) Fixed point position when the tensor data type is QS8, QS16 or QS32. + */ + void init(size_t num_channels, DataType data_type, size_t fixed_point_position = 0); + + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + */ + void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + /** Initialize the metadata structure with the given parameters + * + * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. + * @param[in] num_channels Desired number of channels for each tensor element. + * @param[in] data_type Data type to use for each tensor element. + * @param[in] strides_in_bytes Stride in bytes for accessing each dimension of the tensor. + * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. + * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + */ + void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, + size_t total_size_in_bytes, int fixed_point_position = 0); + /** Initialize the metadata structure for the given HOG's metadata + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + void init(const HOGInfo &hog_info, unsigned int width, unsigned int height); + /** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated) + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements + * @param[in] format Single plane format of the image. + * + * @return Total allocation size including padding in bytes. + */ + size_t init_auto_padding(const TensorShape &tensor_shape, Format format); + /** Initialize the metadata structure for the given tensor shape, number of channels, + * data type and fixed point position. (Padding is automatically calculated) + * + * @note The padding used by this method is really conservative so that the tensor can be used for most functions. + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + * + * @return Total allocation size including padding in bytes. + */ + size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + /** Initialize the metadata structure for the given HOG's metadata + * + * @note init_auto_padding will be used for the tensor initialization. + * + * @param[in] hog_info HOG's metadata used to allocate normalized HOG space + * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on + * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on + */ + size_t init_auto_padding(const HOGInfo &hog_info, unsigned int width, unsigned int height); + + // Inherited methods overridden: + void set_data_type(DataType data_type) override; + void set_num_channels(int num_channels) override; + void set_format(Format format) override; + void set_tensor_shape(TensorShape shape) override; + void set_fixed_point_position(int fixed_point_position) override; + bool auto_padding() override; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override + { + return _tensor_shape[index]; + } + const Strides &strides_in_bytes() const override + { + return _strides_in_bytes; + } + size_t offset_first_element_in_bytes() const override + { + return _offset_first_element_in_bytes; + } + size_t offset_element_in_bytes(const Coordinates &pos) const override; + int fixed_point_position() const override + { + return _fixed_point_position; + } + size_t element_size() const override + { + return data_size_from_type(_data_type) * _num_channels; + } + size_t num_dimensions() const override + { + return _tensor_shape.num_dimensions(); + } + size_t num_channels() const override + { + return _num_channels; + } + const TensorShape &tensor_shape() const override + { + return _tensor_shape; + } + DataType data_type() const override + { + return _data_type; + } + Format format() const override + { + return _format; + } + size_t total_size() const override + { + return _total_size; + } + PaddingSize padding() const override + { + return _padding; + } + bool has_padding() const override + { + return !_padding.empty(); + } + bool is_resizable() const override + { + return _is_resizable; + } + void set_is_resizable(bool is_resizable) override + { + _is_resizable = is_resizable; + } + ValidRegion valid_region() const override + { + return _valid_region; + } + void set_valid_region(ValidRegion valid_region) override + { + _valid_region = std::move(valid_region); + } + +private: + /** Calculates strides, offset and total size resulting from the specified padding around the XY plane. + * + * @param[in] padding Padding around the XY plane in elements. + */ + std::tuple calculate_padding_requirements(const PaddingSize &padding); + + size_t _total_size; + int _fixed_point_position; + size_t _offset_first_element_in_bytes; + Strides _strides_in_bytes; + size_t _num_channels; + TensorShape _tensor_shape; + DataType _data_type; + Format _format; + bool _is_resizable; + ValidRegion _valid_region; + PaddingSize _padding; +}; +} +#endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h new file mode 100644 index 0000000000..f8b3181686 --- /dev/null +++ b/arm_compute/core/TensorShape.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORSHAPE_H__ +#define __ARM_COMPUTE_TENSORSHAPE_H__ + +#include "arm_compute/core/Dimensions.h" +#include "arm_compute/core/Error.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Shape of a tensor */ +class TensorShape : public Dimensions +{ +public: + /** Constructor to initialize the tensor shape. + * + * @param[in] dims Values to initialize the dimensions. + */ + template + TensorShape(Ts... dims) + : Dimensions{ dims... } + { + // Initialize unspecified dimensions to 1 + if(_num_dimensions > 0) + { + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + + // Correct number dimensions to ignore trailing dimensions of size 1 + apply_dimension_correction(); + } + /** Allow instances of this class to be copy constructed */ + TensorShape(const TensorShape &) = default; + /** Allow instances of this class to be copied */ + TensorShape &operator=(const TensorShape &) = default; + /** Allow instances of this class to be move constructed */ + TensorShape(TensorShape &&) = default; + /** Allow instances of this class to be moved */ + TensorShape &operator=(TensorShape &&) = default; + /** Default destructor */ + ~TensorShape() = default; + + /** Accessor to set the value of one of the dimensions. + * + * @param[in] dimension Dimension for which the value is set. + * @param[in] value Value to be set for the dimension. + */ + void set(size_t dimension, size_t value) + { + ARM_COMPUTE_ERROR_ON(value < 1); + + // Make sure all empty dimensions are filled with 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + + // Set the specified dimension and increase the number of dimensions if + // necessary + Dimensions::set(dimension, value); + + // Correct number dimensions to ignore trailing dimensions of size 1 + apply_dimension_correction(); + } + + /** Collapse the first n dimensions. + * + * @param[in] first Dimensions into which the following @p n are collapsed. + * @param[in] n Number of dimensions to collapse into @p first. + */ + void collapse(size_t n, size_t first = 0) + { + Dimensions::collapse(n, first); + + // Make sure all empty dimensions are filled with 1 + std::fill(_id.begin() + _num_dimensions, _id.end(), 1); + } + + /** Collapses all dimensions to a single linear total size. + * + * @return The total tensor size in terms of elements. + */ + size_t total_size() const + { + return std::accumulate(_id.begin(), _id.end(), 1, std::multiplies()); + } + /** Collapses given dimension and above. + * + * @note Precondition: dimension < TensorShape::num_max_dimensions + * + * @param[in] dimension Size of the wanted dimension + * + * @return The linear size of the collapsed dimensions + */ + size_t total_size_upper(size_t dimension) const + { + return std::accumulate(_id.begin() + dimension, _id.end(), 1, std::multiplies()); + } + +private: + /** Remove trailing dimensions of size 1 from the reported number of dimensions. */ + void apply_dimension_correction() + { + for(int i = static_cast(_num_dimensions) - 1; i >= 0; --i) + { + if(_id[i] == 1) + { + --_num_dimensions; + } + else + { + break; + } + } + } +}; +} +#endif /*__ARM_COMPUTE_TENSORSHAPE_H__*/ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h new file mode 100644 index 0000000000..725567b9ae --- /dev/null +++ b/arm_compute/core/Types.h @@ -0,0 +1,636 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TYPES_H__ +#define __ARM_COMPUTE_TYPES_H__ + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/TensorShape.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +/** Image colour formats */ +enum class Format +{ + UNKNOWN, /** Unknown image format */ + U8, /** 1 channel, 1 U8 per channel */ + S16, /** 1 channel, 1 S16 per channel */ + U16, /** 1 channel, 1 U16 per channel */ + S32, /** 1 channel, 1 S32 per channel */ + U32, /** 1 channel, 1 U32 per channel */ + F16, /** 1 channel, 1 F16 per channel */ + F32, /** 1 channel, 1 F32 per channel */ + UV88, /** 2 channel, 1 U8 per channel */ + RGB888, /** 3 channels, 1 U8 per channel */ + RGBA8888, /** 4 channels, 1 U8 per channel */ + YUV444, /** A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ + YUYV422, /** A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ + NV12, /** A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ + NV21, /** A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ + IYUV, /** A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ + UYVY422 /** A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ +}; + +/** Available data types */ +enum class DataType +{ + UNKNOWN, + U8, + S8, + QS8, + U16, + S16, + QS16, + U32, + S32, + U64, + S64, + F16, + F32, + F64, + SIZET +}; + +/** Constant value of the border pixels when using BorderMode::CONSTANT */ +constexpr uint8_t CONSTANT_BORDER_VALUE = 199; + +/* Constant value used to indicate a half-scale pyramid */ +constexpr float SCALE_PYRAMID_HALF = 0.5f; + +/* Constant value used to indicate a ORB scaled pyramid */ +constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01; + +struct ValidRegion +{ + ValidRegion() + : anchor{}, shape{} + { + } + + ValidRegion(const ValidRegion &) = default; + ValidRegion(ValidRegion &&) = default; + ValidRegion &operator=(const ValidRegion &) = default; + ValidRegion &operator=(ValidRegion &&) = default; + ~ValidRegion() = default; + + ValidRegion(Coordinates anchor, TensorShape shape) + : anchor{ anchor }, shape{ shape } + { + } + + /** Return the start of the valid region for the given dimension @p d */ + int start(unsigned int d) const + { + return anchor[d]; + } + + /** Return the end of the valid region for the given dimension @p d */ + int end(unsigned int d) const + { + return anchor[d] + shape[d]; + } + + Coordinates anchor; + TensorShape shape; +}; + +/** Methods available to handle borders */ +enum class BorderMode +{ + UNDEFINED, /**< Borders are left undefined */ + CONSTANT, /**< Pixels outside the image are assumed to have a constant value */ + REPLICATE /**< Pixels outside the image are assumed to have the same value as the closest image pixel */ +}; + +/** Container for 2D border size */ +struct BorderSize +{ + /** Empty border, i.e. no border */ + constexpr BorderSize() + : top{ 0 }, right{ 0 }, bottom{ 0 }, left{ 0 } + { + } + + /** Border with equal size around the 2D plane */ + constexpr BorderSize(unsigned int size) + : top{ size }, right{ size }, bottom{ size }, left{ size } + { + } + + /** Border with same size for top/bottom and left/right */ + constexpr BorderSize(unsigned int top_bottom, unsigned int left_right) + : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right } + { + } + + /** Border with different sizes */ + constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left) + : top{ top }, right{ right }, bottom{ bottom }, left{ left } + { + } + + /** Check if the entire border is zero */ + constexpr bool empty() const + { + return top == 0 && right == 0 && bottom == 0 && left == 0; + } + + /** Check if the border is the same size on all sides */ + constexpr bool uniform() const + { + return top == right && top == bottom && top == left; + } + + BorderSize &operator*=(float scale) + { + top *= scale; + right *= scale; + bottom *= scale; + left *= scale; + + return *this; + } + + BorderSize operator*(float scale) + { + BorderSize size = *this; + size *= scale; + + return size; + } + + void limit(const BorderSize &limit) + { + top = std::min(top, limit.top); + right = std::min(right, limit.right); + bottom = std::min(bottom, limit.bottom); + left = std::min(left, limit.left); + } + + unsigned int top; + unsigned int right; + unsigned int bottom; + unsigned int left; +}; + +using PaddingSize = BorderSize; + +/** Policy to handle overflow */ +enum class ConvertPolicy +{ + WRAP, /**< Wrap around */ + SATURATE /**< Saturate */ +}; + +/** Interpolation method */ +enum class InterpolationPolicy +{ + NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */ + BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */ + AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ +}; + +/** Bilinear Interpolation method used by LKTracker */ +enum class BilinearInterpolation +{ + BILINEAR_OLD_NEW, + BILINEAR_SCHARR +}; + +/** Threshold mode */ +enum class ThresholdType +{ + BINARY, /**< Threshold with one value */ + RANGE /**< Threshold with two values*/ +}; + +/** Rounding method */ +enum class RoundingPolicy +{ + TO_ZERO, /**< Truncates the least significand values that are lost in operations. */ + TO_NEAREST_UP, /**< Rounds to nearest value; half rounds up */ + TO_NEAREST_EVEN /**< Rounds to nearest value; half rounds to nearest even */ +}; + +/** Termination criteria */ +enum class Termination +{ + TERM_CRITERIA_EPSILON, + TERM_CRITERIA_ITERATIONS, + TERM_CRITERIA_BOTH +}; + +/** Magnitude calculation type. */ +enum class MagnitudeType +{ + L1NORM, /**< L1 normalization type */ + L2NORM /**< L2 normalization type */ +}; + +/** Phase calculation type. + * + * @note When PhaseType == SIGNED, each angle is mapped to the range 0 to 255 inclusive otherwise angles between 0 and 180 + */ +enum class PhaseType +{ + SIGNED, /**< Angle range: [0, 360] */ + UNSIGNED /**< Angle range: [0, 180] */ +}; + +/** Keypoint type */ +struct KeyPoint +{ + int32_t x{ 0 }; /**< X coordinates */ + int32_t y{ 0 }; /**< Y coordinates */ + float strength{ 0.f }; /**< Strength of the point */ + float scale{ 0.f }; /**< Scale initialized to 0 by the corner detector */ + float orientation{ 0.f }; /**< Orientation initialized to 0 by the corner detector */ + int32_t tracking_status{ 0 }; /**< Status initialized to 1 by the corner detector, set to 0 when the point is lost */ + float error{ 0.f }; /**< Tracking error initialized to 0 by the corner detector */ +}; + +using InternalKeypoint = std::tuple; /* x,y,strength */ + +/** Rectangle type */ +struct Rectangle +{ + uint16_t x; /**< Top-left x coordinate */ + uint16_t y; /**< Top-left y coordinate */ + uint16_t width; /**< Width of the rectangle */ + uint16_t height; /**< Height of the rectangle */ +}; + +/** Coordinate type */ +struct Coordinates2D +{ + int32_t x; /**< X coordinates */ + int32_t y; /**< Y coordinates */ +}; + +/** Coordinate type */ +struct Coordinates3D +{ + uint32_t x; /**< X coordinates */ + uint32_t y; /**< Y coordinates */ + uint32_t z; /**< Z coordinates */ +}; + +/** Available channels */ +enum class Channel +{ + UNKNOWN, /** Unknown channel format */ + C0, /**< First channel (used by formats with unknown channel types). */ + C1, /**< Second channel (used by formats with unknown channel types). */ + C2, /**< Third channel (used by formats with unknown channel types). */ + C3, /**< Fourth channel (used by formats with unknown channel types). */ + R, /**< Red channel. */ + G, /**< Green channel. */ + B, /**< Blue channel. */ + A, /**< Alpha channel. */ + Y, /**< Luma channel. */ + U, /**< Cb/U channel. */ + V /**< Cr/V/Value channel. */ +}; + +/** Available matrix patterns */ +enum class MatrixPattern +{ + BOX, /**< Box pattern matrix. */ + CROSS, /**< Cross pattern matrix. */ + DISK, /**< Disk pattern matrix. */ + OTHER /**< Any other matrix pattern. */ +}; + +/** Available non linear functions. */ +enum class NonLinearFilterFunction : unsigned +{ + MEDIAN = 0, /**< Non linear median filter. */ + MIN = 1, /**< Non linear erode. */ + MAX = 2, /**< Non linear dilate. */ +}; + +/** The normalization type used for the normalization layer */ +enum class NormType +{ + IN_MAP_1D, /**< Normalization applied within the same map in 1D region */ + IN_MAP_2D, /**< Normalization applied within the same map in 2D region */ + CROSS_MAP /**< Normalization applied cross maps */ +}; + +/** Normalization type for Histogram of Oriented Gradients (HOG) */ +enum class HOGNormType +{ + L2_NORM = 1, /**< L2-norm */ + L2HYS_NORM = 2, /**< L2-norm followed by clipping */ + L1_NORM = 3 /**< L1 norm */ +}; + +/** Detection window used for the object detection. The detection window keeps the following information: + * + * -# Geometry of the rectangular window (x/y of top-left corner and width/height) + * -# Index of the class used for evaluating which class the detection window belongs to + * -# Confidence value (score) obtained with the classifier + */ +struct DetectionWindow +{ + uint16_t x{ 0 }; /**< Top-left x coordinate */ + uint16_t y{ 0 }; /**< Top-left y coordinate */ + uint16_t width{ 0 }; /**< Width of the detection window */ + uint16_t height{ 0 }; /**< Height of the detection window */ + uint16_t idx_class{ 0 }; /**< Index of the class */ + float score{ 0.f }; /**< Confidence value for the detection window */ +}; + +/** Dimension rounding type when down-scaling on CNNs + * @note Used in pooling and convolution layer + */ +enum class DimensionRoundingType +{ + FLOOR, /**< Floor rounding */ + CEIL /**< Ceil rounding */ +}; + +/** Available pooling types */ +enum class PoolingType +{ + MAX, /**< Max Pooling */ + AVG /**< Average Pooling */ +}; + +/** Padding and stride information class */ +class PadStrideInfo +{ +public: + /** Constructor + * + * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. + * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. + * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. + * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. + * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR. + */ + PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1, + unsigned int pad_x = 0, unsigned int pad_y = 0, + DimensionRoundingType round = DimensionRoundingType::FLOOR) + : _stride(std::make_pair(stride_x, stride_y)), + _pad(std::make_pair(pad_x, pad_y)), + _round_type(round) + { + } + std::pair stride() const + { + return _stride; + } + std::pair pad() const + { + return _pad; + } + DimensionRoundingType round() const + { + return _round_type; + } + +private: + std::pair _stride; + std::pair _pad; + DimensionRoundingType _round_type; +}; + +/** Pooling Layer Information class */ +class PoolingLayerInfo +{ +public: + /** Default Constructor + * + * @param[in] pool_type Pooling type @ref PoolingType. Defaults to @ref PoolingType::MAX + * @param[in] pool_size (Optional) Pooling size, in elements, across x and y. Defaults to 2. + * @param[in] pad_stride_info (Optional) Padding and stride information @ref PadStrideInfo + */ + PoolingLayerInfo(PoolingType pool_type = PoolingType::MAX, unsigned int pool_size = 2, PadStrideInfo pad_stride_info = PadStrideInfo()) + : _pool_type(pool_type), _pool_size(pool_size), _pad_stride_info(pad_stride_info) + { + } + PoolingType pool_type() const + { + return _pool_type; + } + unsigned int pool_size() const + { + return _pool_size; + } + PadStrideInfo pad_stride_info() const + { + return _pad_stride_info; + } + +private: + PoolingType _pool_type; + unsigned int _pool_size; + PadStrideInfo _pad_stride_info; +}; + +/** Activation Layer Information class */ +class ActivationLayerInfo +{ +public: + /** Available activation functions */ + enum class ActivationFunction + { + LOGISTIC, /**< Logistic */ + TANH, /**< Hyperbolic tangent */ + RELU, /**< Rectifier */ + BOUNDED_RELU, /**< Bounded Rectifier */ + SOFT_RELU, /**< Soft Rectifier */ + ABS, /**< Absolute */ + SQUARE, /**< Square */ + SQRT, /**< Square root */ + LINEAR /**< Linear */ + }; + + /** Default Constructor + * + * @param[in] f The activation function to use. + * @param[in] a (Optional) The alpha parameter used by some activation functions + * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). + * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). + */ + ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f) + : _act(f), _a(a), _b(b) + { + } + ActivationFunction activation() const + { + return _act; + } + float a() const + { + return _a; + } + float b() const + { + return _b; + } + +private: + ActivationFunction _act; + float _a; + float _b; +}; + +/** Normalization Layer Information class */ +class NormalizationLayerInfo +{ +public: + /** Default Constructor + * + * @param[in] type The normalization type. Can be @ref NormType::IN_MAP_1D, @ref NormType::IN_MAP_2D or @ref NORM_TYPE::CROSS_MAP + * @param[in] norm_size The normalization size is the number of elements to normalize across. Defaults to 5. + * @param[in] alpha Alpha parameter used by normalization equation. Defaults to 0.0001. + * @param[in] beta Beta parameter used by normalization equation. Defaults to 0.5. + * @param[in] kappa Kappa parameter used by [Krichevksy 2012] Across Channel Local Brightness Normalization equation. + */ + NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f) + : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa) + { + } + NormType type() const + { + return _type; + } + uint32_t norm_size() const + { + return _norm_size; + } + float alpha() const + { + return _alpha; + } + float beta() const + { + return _beta; + } + float kappa() const + { + return _kappa; + } + /** Return the scaling factor of the normalization function. If kappa is not + * 1 then [Krichevksy 2012] normalization scaling is specified. Scaling + * factor takes into account the total number of elements used for the + * normalization, so in case of 2 dimensions this is _norm_size^2. + * + * @return The normalization scaling factor. + */ + float scale_coeff() const + { + const uint32_t size = (_type == NormType::IN_MAP_2D) ? _norm_size * _norm_size : _norm_size; + return (_kappa == 1.f) ? (_alpha / size) : _alpha; + } + +private: + NormType _type; + uint32_t _norm_size; + float _alpha; + float _beta; + float _kappa; +}; + +/** Convolution Layer Weights Information class */ +class WeightsInfo +{ +public: + WeightsInfo() + : _are_reshaped(false), _kernel_size(0) + { + } + /** Constructor + * + * @param[in] are_reshaped True if the weights have been reshaped + * @param[in] kernel_size The size of the kernel. + */ + WeightsInfo(bool are_reshaped, unsigned int kernel_size) + : _are_reshaped(are_reshaped), _kernel_size(kernel_size) + { + } + + bool are_reshaped() const + { + return _are_reshaped; + }; + unsigned int kernel_size() const + { + return _kernel_size; + } + +private: + const bool _are_reshaped; + const unsigned int _kernel_size; +}; + +/** IO formatting information class*/ +struct IOFormatInfo +{ + /** Precision type used when printing floating point numbers */ + enum class PrecisionType + { + Default, /**< Default precision to the one that the current stream has */ + Custom, /**< Custom precision specified by the user using the precision parameter */ + Full /**< The maximum precision of the floating point representation */ + }; + + /** Specifies the area to be printed, used by Tensor objects */ + enum class PrintRegion + { + ValidRegion, /**< Prints the valid region of the Tensor object */ + NoPadding, /**< Prints the Tensor object without the padding */ + Full /**< Print the tensor object including padding */ + }; + + IOFormatInfo(PrintRegion print_region = PrintRegion::ValidRegion, + PrecisionType precision_type = PrecisionType::Default, + unsigned int precision = 10, + bool align_columns = true, + std::string element_delim = " ", + std::string row_delim = "\n") + : print_region(print_region), + precision_type(precision_type), + precision(precision), + element_delim(element_delim), + row_delim(row_delim), + align_columns(align_columns) + { + } + + PrintRegion print_region; + PrecisionType precision_type; + unsigned int precision; + std::string element_delim; + std::string row_delim; + bool align_columns; +}; +} +#endif /* __ARM_COMPUTE_TYPES_H__ */ diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h new file mode 100644 index 0000000000..9d3ff0a1bd --- /dev/null +++ b/arm_compute/core/Utils.h @@ -0,0 +1,740 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_H__ +#define __ARM_COMPUTE_UTILS_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace arm_compute +{ +/** Computes the smallest number larger or equal to value that is a multiple of divisor. */ +template +inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return ((value + divisor - 1) / divisor) * divisor; +} + +/** Computes the largest number smaller or equal to value that is a multiple of divisor. */ +template +inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return (value / divisor) * divisor; +} + +/** Calculate the rounded up quotient of val / m. */ +template +constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m) +{ + return (val + m - 1) / m; +} + +/** Returns the arm_compute library build information + * + * Contains the version number and the build options used to build the library + * + * @return The arm_compute library build information + */ +std::string build_information(); + +/** Load an entire file in memory + * + * @param[in] filename Name of the file to read. + * @param[in] binary Is it a binary file ? + * + * @return The content of the file. + */ +std::string read_file(const std::string &filename, bool binary); + +/** Return a value as a string + * + * @param[in] val Input value. + * + * @return Value represented as a string + */ +template +const std::string val_to_string(T val) +{ + return static_cast(std::ostringstream() << val).str(); +} + +/** The size in bytes of the data type + * + * @param[in] data_type Input data type + * + * @return The size in bytes of the data type + */ +inline size_t data_size_from_type(DataType data_type) +{ + switch(data_type) + { + case DataType::U8: + case DataType::S8: + case DataType::QS8: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::F16: + case DataType::QS16: + return 2; + case DataType::F32: + case DataType::U32: + case DataType::S32: + return 4; + case DataType::F64: + case DataType::U64: + case DataType::S64: + return 8; + case DataType::SIZET: + return sizeof(size_t); + default: + ARM_COMPUTE_ERROR("Invalid data type"); + return 0; + } +} + +/** The size in bytes of the pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline size_t pixel_size_from_format(Format format) +{ + switch(format) + { + case Format::U8: + return 1; + case Format::U16: + case Format::S16: + case Format::F16: + case Format::UV88: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + case Format::U32: + case Format::S32: + case Format::F32: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Undefined pixel size for given format"); + return 0; + } +} + +/** The size in bytes of the data type + * + * @param[in] dt Input data type + * + * @return The size in bytes of the data type + */ +inline size_t element_size_from_data_type(DataType dt) +{ + switch(dt) + { + case DataType::S8: + case DataType::U8: + case DataType::QS8: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::QS16: + case DataType::F16: + return 2; + case DataType::U32: + case DataType::S32: + case DataType::F32: + return 4; + default: + ARM_COMPUTE_ERROR("Undefined element size for given data type"); + return 0; + } +} + +/** Return the data type used by a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline DataType data_type_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::UV88: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return DataType::U8; + case Format::U16: + return DataType::U16; + case Format::S16: + return DataType::S16; + case Format::U32: + return DataType::U32; + case Format::S32: + return DataType::S32; + case Format::F16: + return DataType::F16; + case Format::F32: + return DataType::F32; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Not supported data_type for given format"); + return DataType::UNKNOWN; + } +} + +/** Return the plane index of a given channel given an input format. + * + * @param[in] format Input format + * @param[in] channel Input channel + * + * @return The plane index of the specific channel of the specific format + */ +inline int plane_idx_from_channel(Format format, Channel channel) +{ + switch(format) + { + case Format::NV12: + case Format::NV21: + { + switch(channel) + { + case Channel::Y: + return 0; + case Channel::U: + case Channel::V: + return 1; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::IYUV: + case Format::YUV444: + { + switch(channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of planes for a given format + * + * @param[in] format Input format + * + * @return The number of planes for a given image format. + */ +inline size_t num_planes_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::U32: + case Format::F16: + case Format::F32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return 1; + case Format::NV12: + case Format::NV21: + return 2; + case Format::IYUV: + case Format::YUV444: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of channels for a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The number of channels for a given image format. + */ +inline size_t num_channels_from_format(Format format) +{ + switch(format) + { + case Format::U8: + case Format::U16: + case Format::S16: + case Format::U32: + case Format::S32: + case Format::F16: + case Format::F32: + return 1; + // Because the U and V channels are subsampled + // these formats appear like having only 2 channels: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::UV88: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + return 0; + } +} + +/** Separate a 2D convolution into two 1D convolutions +* +* @param[in] conv 2D convolution +* @param[out] conv_col 1D vertical convolution +* @param[out] conv_row 1D horizontal convolution +* @param[in] size Size of the 2D convolution +* +* @return true if the separation was successful +*/ +inline bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size) +{ + int32_t min_col = -1; + int16_t min_col_val = -1; + + for(int32_t i = 0; i < size; ++i) + { + if(conv[i] != 0 && (min_col < 0 || abs(min_col_val) > abs(conv[i]))) + { + min_col = i; + min_col_val = conv[i]; + } + } + + if(min_col < 0) + { + return false; + } + + for(uint32_t j = 0; j < size; ++j) + { + conv_col[j] = conv[min_col + j * size]; + } + + for(uint32_t i = 0; i < size; i++) + { + if(static_cast(i) == min_col) + { + conv_row[i] = 1; + } + else + { + int16_t coeff = conv[i] / conv[min_col]; + + for(uint32_t j = 1; j < size; ++j) + { + if(conv[i + j * size] != (conv_col[j] * coeff)) + { + return false; + } + } + + conv_row[i] = coeff; + } + } + + return true; +} + +/** Calculate the scale of the given square matrix + * + * The scale is the absolute value of the sum of all the coefficients in the matrix. + * + * @note If the coefficients add up to 0 then the scale is set to 1. + * + * @param[in] matrix Matrix coefficients + * @param[in] matrix_size Number of elements per side of the square matrix. (Number of coefficients = matrix_size * matrix_size). + * + * @return The absolute value of the sum of the coefficients if they don't add up to 0, otherwise 1. + */ +inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size) +{ + const size_t size = matrix_size * matrix_size; + + return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0))); +} + +/** Calculate accurary required by the horizontal and vertical convolution computations + * + * @param[in] conv_col Pointer to the vertical vector of the separated convolution filter + * @param[in] conv_row Pointer to the horizontal vector of the convolution filter + * @param[in] size Number of elements per vector of the separated matrix + * + * @return The return type is a pair. The first element of the pair is the biggest data type needed for the first stage. The second + * element of the pair is the biggest data type needed for the second stage. + */ +inline std::pair data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size) +{ + DataType first_stage = DataType::UNKNOWN; + DataType second_stage = DataType::UNKNOWN; + + auto gez = [](const int16_t &v) + { + return v >= 0; + }; + + auto accu_neg = [](const int &first, const int &second) + { + return first + (second < 0 ? second : 0); + }; + + auto accu_pos = [](const int &first, const int &second) + { + return first + (second > 0 ? second : 0); + }; + + const bool only_positive_coefficients = std::all_of(conv_row, conv_row + size, gez) && std::all_of(conv_col, conv_col + size, gez); + + if(only_positive_coefficients) + { + const int max_row_value = std::accumulate(conv_row, conv_row + size, 0) * UINT8_MAX; + const int max_value = std::accumulate(conv_col, conv_col + size, 0) * max_row_value; + + first_stage = (max_row_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; + + second_stage = (max_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; + } + else + { + const int min_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_neg) * UINT8_MAX; + const int max_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_pos) * UINT8_MAX; + const int neg_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_neg); + const int pos_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_pos); + const int min_value = neg_coeffs_sum * max_row_value + pos_coeffs_sum * min_row_value; + const int max_value = neg_coeffs_sum * min_row_value + pos_coeffs_sum * max_row_value; + + first_stage = ((INT16_MIN <= min_row_value) && (max_row_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; + + second_stage = ((INT16_MIN <= min_value) && (max_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; + } + + return std::make_pair(first_stage, second_stage); +} + +/** Calculate the accuracy required by the squared convolution calculation. + * + * + * @param[in] conv Pointer to the squared convolution matrix + * @param[in] size The total size of the convolution matrix + * + * @return The return is the biggest data type needed to do the convolution + */ +inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t size) +{ + auto gez = [](const int16_t v) + { + return v >= 0; + }; + + const bool only_positive_coefficients = std::all_of(conv, conv + size, gez); + + if(only_positive_coefficients) + { + const int max_conv_value = std::accumulate(conv, conv + size, 0) * UINT8_MAX; + if(max_conv_value <= UINT16_MAX) + { + return DataType::U16; + } + else + { + return DataType::S32; + } + } + else + { + const int min_value = std::accumulate(conv, conv + size, 0, [](int a, int b) + { + return b < 0 ? a + b : a; + }) + * UINT8_MAX; + + const int max_value = std::accumulate(conv, conv + size, 0, [](int a, int b) + { + return b > 0 ? a + b : a; + }) + * UINT8_MAX; + + if((INT16_MIN <= min_value) && (INT16_MAX >= max_value)) + { + return DataType::S16; + } + else + { + return DataType::S32; + } + } +} + +/** Returns expected width and height of output scaled tensor depending on dimensions rounding mode. + * + * @param[in] width Width of input tensor (Number of columns) + * @param[in] height Height of input tensor (Number of rows) + * @param[in] kernel_size Kernel size. + * @param[in] stride_x Stride of the operation in the x dimension. + * @param[in] stride_y Stride of the operation in the y dimension. + * @param[in] pad_x Padding size in the x dimension. + * @param[in] pad_y Padding size in the y dimension. + * @param[in] round_type Dimensions rounding mode. + * + * @return A pair with the new width in the first position and the new height in the second. + */ +const std::pair scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_size, + unsigned int stride_x, unsigned int stride_y, + unsigned int pad_x, unsigned int pad_y, + DimensionRoundingType round_type); + +/** Convert a tensor format into a string. + * + * @param[in] format @ref Format to be translated to string. + * + * @return The string describing the format. + */ +const std::string &string_from_format(Format format); + +/** Convert a channel identity into a string. + * + * @param[in] channel @ref Channel to be translated to string. + * + * @return The string describing the channel. + */ +const std::string &string_from_channel(Channel channel); + +/** Convert a data type identity into a string. + * + * @param[in] dt @ref DataType to be translated to string. + * + * @return The string describing the data type. + */ +const std::string &string_from_data_type(DataType dt); +/** Convert a matrix pattern into a string. + * + * @param[in] pattern @ref MatrixPattern to be translated to string. + * + * @return The string describing the matrix pattern. + */ +const std::string &string_from_matrix_pattern(MatrixPattern pattern); +/** Translates a given activation function to a string. + * + * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. + * + * @return The string describing the activation function. + */ +const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act); +/** Translates a given non linear function to a string. + * + * @param[in] function @ref NonLinearFilterFunction to be translated to string. + * + * @return The string describing the non linear function. + */ +const std::string &string_from_non_linear_filter_function(NonLinearFilterFunction function); +/** Translates a given interpolation policy to a string. + * + * @param[in] policy @ref InterpolationPolicy to be translated to string. + * + * @return The string describing the interpolation policy. + */ +const std::string &string_from_interpolation_policy(InterpolationPolicy policy); +/** Translates a given border mode policy to a string. + * + * @param[in] border_mode @ref BorderMode to be translated to string. + * + * @return The string describing the border mode. + */ +const std::string &string_from_border_mode(BorderMode border_mode); +/** Translates a given normalization type to a string. + * + * @param[in] type @ref NormType to be translated to string. + * + * @return The string describing the normalization type. + */ +const std::string &string_from_norm_type(NormType type); +/** Lower a given string. + * + * @param[in] val Given string to lower. + * + * @return The lowered string + */ +std::string lower_string(const std::string &val); + +/** Check if a given data type is of floating point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of floating point type, else false. + */ +inline bool is_data_type_float(DataType dt) +{ + switch(dt) + { + case DataType::F16: + case DataType::F32: + return true; + default: + return false; + } +} + +/** Check if a given data type is of fixed point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of fixed point type, else false. + */ +inline bool is_data_type_fixed_point(DataType dt) +{ + switch(dt) + { + case DataType::QS8: + case DataType::QS16: + return true; + default: + return false; + } +} + +/** Print consecutive elements to an output stream. + * + * @param[out] s Output stream to print the elements to. + * @param[in] ptr Pointer to print the elements from. + * @param[in] n Number of elements to print. + * @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0. + * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter + */ +template +void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ") +{ + using print_type = typename std::conditional::value, T, int>::type; + + for(unsigned int i = 0; i < n; ++i) + { + // Set stream width as it is not a "sticky" stream manipulator + if(stream_width != 0) + { + s.width(stream_width); + } + s << std::right << static_cast(ptr[i]) << element_delim; + } +} + +/** Identify the maximum width of n consecutive elements. + * + * @param[in] s The output stream which will be used to print the elements. Used to extract the stream format. + * @param[in] ptr Pointer to the elements. + * @param[in] n Number of elements. + * + * @return The maximum width of the elements. + */ +template +int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, unsigned int n) +{ + using print_type = typename std::conditional::value, T, int>::type; + + int max_width = -1; + for(unsigned int i = 0; i < n; ++i) + { + std::stringstream ss; + ss.copyfmt(s); + ss << static_cast(ptr[i]); + max_width = std::max(max_width, ss.str().size()); + } + return max_width; +} + +/** Print consecutive elements to an output stream. + * + * @param[out] s Output stream to print the elements to. + * @param[in] dt Data type of the elements + * @param[in] ptr Pointer to print the elements from. + * @param[in] n Number of elements to print. + * @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0. + * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter + */ +void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " "); + +/** Identify the maximum width of n consecutive elements. + * + * @param[in] s Output stream to print the elements to. + * @param[in] dt Data type of the elements + * @param[in] ptr Pointer to print the elements from. + * @param[in] n Number of elements to print. + * + * @return The maximum width of the elements. + */ +int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n); +} +#endif /*__ARM_COMPUTE_UTILS_H__ */ diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h new file mode 100644 index 0000000000..48eba70adf --- /dev/null +++ b/arm_compute/core/Validate.h @@ -0,0 +1,563 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_VALIDATE_H__ +#define __ARM_COMPUTE_VALIDATE_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/IKernel.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Window.h" + +#include + +namespace arm_compute +{ +namespace detail +{ +/* Check whether two dimension objects differ. + * + * @param[in] dim1 First object to be compared. + * @param[in] dim2 Second object to be compared. + * @param[in] upper_dim The dimension from which to check. + * + * @return Return true if the two objects are different. + */ +template +inline bool have_different_dimensions(const Dimensions &dim1, const Dimensions &dim2, unsigned int upper_dim) +{ + for(unsigned int i = upper_dim; i < arm_compute::Dimensions::num_max_dimensions; ++i) + { + if(dim1[i] != dim2[i]) + { + return true; + } + } + + return false; +} + +/** Functor to compare two @ref Dimensions objects and throw an error on mismatch. + * + * @param[in] dim Object to compare against. + * @param[in] function Function in which the error occured. + * @param[in] file File in which the error occured. + * @param[in] line Line in which the error occured. + */ +template +class compare_dimension +{ +public: + compare_dimension(const Dimensions &dim, const char *function, const char *file, int line) + : _dim{ dim }, _function{ function }, _file{ file }, _line{ line } + { + } + + /** Compare the given object against the stored one. + * + * @param[in] dim To be compared object. + */ + void operator()(const Dimensions &dim) + { + ARM_COMPUTE_ERROR_ON_LOC_MSG(have_different_dimensions(_dim, dim, 0), _function, _file, _line, + "Objects have different dimensions"); + } + +private: + const Dimensions &_dim; + const char *const _function; + const char *const _file; + const int _line; +}; +} // namespace detail +/** Throw an error if one of the pointers is a nullptr. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] pointers Pointers to check against nullptr. + */ +template +void error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers) +{ + auto is_nullptr = [&](const void *ptr) + { + ARM_COMPUTE_ERROR_ON_LOC(ptr == nullptr, function, file, line); + }; + + for_each(is_nullptr, std::forward(pointers)...); +} +#define ARM_COMPUTE_ERROR_ON_NULLPTR(...) ::arm_compute::error_on_nullptr(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed window is invalid. + * + * The subwindow is invalid if: + * - It is not a valid window. + * - Its dimensions don't match the full window's ones + * - The step for each of its dimension is not identical to the corresponding one of the full window. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] full Full size window + * @param[in] win Window to validate. + */ +void error_on_mismatching_windows(const char *function, const char *file, const int line, + const Window &full, const Window &win); +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) ::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w) + +/** Throw an error if the passed subwindow is invalid. + * + * The subwindow is invalid if: + * - It is not a valid window. + * - It is not fully contained inside the full window + * - The step for each of its dimension is not identical to the corresponding one of the full window. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] full Full size window + * @param[in] sub Sub-window to validate. + */ +void error_on_invalid_subwindow(const char *function, const char *file, const int line, + const Window &full, const Window &sub); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) ::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s) + +/** Throw an error if the passed coordinates have too many dimensions. + * + * The coordinates have too many dimensions if any of the dimensions greater or equal to max_dim is different from 0. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] pos Coordinates to validate + * @param[in] max_dim Maximum number of dimensions allowed. + */ +void error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line, + const Coordinates &pos, unsigned int max_dim); +#define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) ::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md) + +/** Throw an error if the passed window has too many dimensions. + * + * The window has too many dimensions if any of the dimension greater or equal to max_dim is different from 0. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] win Window to validate + * @param[in] max_dim Maximum number of dimensions allowed. + */ +void error_on_window_dimensions_gte(const char *function, const char *file, const int line, + const Window &win, unsigned int max_dim); +#define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) ::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md) + +/** Throw an error if the passed dimension objects differ. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] dim1 The first object to be compared. + * @param[in] dim2 The second object to be compared. + * @param[in] dims (Optional) Further allowed objects. + */ +template +void error_on_mismatching_dimensions(const char *function, const char *file, int line, + const Dimensions &dim1, const Dimensions &dim2, Ts &&... dims) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + + for_each(detail::compare_dimension(dim1, function, file, line), dim2, std::forward(dims)...); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed two tensors have different shapes from the given dimension + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_shapes(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward(tensors)...); +} + +/** Throw an error if the passed two tensors have different shapes from the given dimension + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] upper_dim The dimension from which to check. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_shapes(const char *function, const char *file, const int line, + unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + + const std::array < const ITensor *, 2 + sizeof...(Ts) > tensors_array{ { tensor_1, tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC(tensors_array.cbegin() == nullptr, function, file, line); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_array.cbegin()), tensors_array.cend(), [&](const ITensor * tensor) + { + ARM_COMPUTE_ERROR_ON_LOC(tensor == nullptr, function, file, line); + return detail::have_different_dimensions((*tensors_array.cbegin())->info()->tensor_shape(), tensor->info()->tensor_shape(), upper_dim); + }), + function, file, line, "Tensors have different shapes"); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) ::arm_compute::error_on_mismatching_shapes(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed two tensors have different data types + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_data_types(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + ARM_COMPUTE_UNUSED(tensor_1); + ARM_COMPUTE_UNUSED(tensor_2); + + DataType &&first_data_type = tensor_1->info()->data_type(); + ARM_COMPUTE_UNUSED(first_data_type); + + const std::array tensors_array{ { std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_2->info()->data_type() != first_data_type || std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->data_type() != first_data_type; + }), + function, file, line, "Tensors have different data types"); +} + +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the passed tensors have different fixed point data types or different fixed point positions + * + * @note: If the first tensor doesn't have fixed point data type, the function returns without throwing an error + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_fixed_point(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + ARM_COMPUTE_UNUSED(function); + ARM_COMPUTE_UNUSED(file); + ARM_COMPUTE_UNUSED(line); + ARM_COMPUTE_UNUSED(tensor_1); + ARM_COMPUTE_UNUSED(tensor_2); + + DataType &&first_data_type = tensor_1->info()->data_type(); + const int first_fixed_point_position = tensor_1->info()->fixed_point_position(); + ARM_COMPUTE_UNUSED(first_data_type); + ARM_COMPUTE_UNUSED(first_fixed_point_position); + + if((first_data_type != DataType::QS8) && (first_data_type != DataType::QS16)) + { + return; + } + + const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_array{ { tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->data_type() != first_data_type; + }), + function, file, line, "Tensors have different fixed point data types"); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->fixed_point_position() != first_fixed_point_position; + }), + function, file, line, "Tensors have different fixed point positions"); +} + +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(...) ::arm_compute::error_on_mismatching_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] object Tensor/multi-image to validate. + * @param[in] format First format allowed. + * @param[in] formats (Optional) Further allowed formats. + */ +template +void error_on_format_not_in(const char *function, const char *file, const int line, + const T *object, F &&format, Fs &&... formats) +{ + ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line); + + Format &&object_format = object->info()->format(); + ARM_COMPUTE_UNUSED(object_format); + + ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line); + + const std::array formats_array{ { std::forward(formats)... } }; + ARM_COMPUTE_UNUSED(formats_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f) + { + return f == object_format; + }), + function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str()); +} +#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) + +/** Throw an error if the data type of the passed tensor does not match any of the data types provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * @param[in] dt First data type allowed. + * @param[in] dts (Optional) Further allowed data types. + */ +template +void error_on_data_type_not_in(const char *function, const char *file, const int line, + const ITensor *tensor, T &&dt, Ts &&... dts) +{ + ARM_COMPUTE_ERROR_ON_LOC(tensor == nullptr, function, file, line); + + const DataType &tensor_dt = tensor->info()->data_type(); //NOLINT + ARM_COMPUTE_UNUSED(tensor_dt); + + ARM_COMPUTE_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line); + + const std::array dts_array{ { std::forward(dts)... } }; + ARM_COMPUTE_UNUSED(dts_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d) + { + return d == tensor_dt; + }), + function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str()); +} +#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t, ...) ::arm_compute::error_on_data_type_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) + +/** Throw an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * @param[in] num_channels Number of channels to check + * @param[in] dt First data type allowed. + * @param[in] dts (Optional) Further allowed data types. + */ +template +void error_on_data_type_channel_not_in(const char *function, const char *file, const int line, + const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts) +{ + error_on_data_type_not_in(function, file, line, tensor, std::forward(dt), std::forward(dts)...); + + const size_t tensor_nc = tensor->info()->num_channels(); + ARM_COMPUTE_UNUSED(tensor_nc); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(tensor_nc != num_channels, function, file, line, "Number of channels %d. Required number of channels %d", tensor_nc, num_channels); +} +#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__) + +/** Throw an error if the tensor is not 2D. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + */ +void error_on_tensor_not_2d(const char *function, const char *file, const int line, + const ITensor *tensor); +#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) ::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t) + +/** Throw an error if the channel is not in channels. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] cn Input channel + * @param[in] channel First channel allowed. + * @param[in] channels (Optional) Further allowed channels. + */ +template +void error_on_channel_not_in(const char *function, const char *file, const int line, + T cn, T &&channel, Ts &&... channels) +{ + ARM_COMPUTE_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line); + + const std::array channels_array{ { std::forward(channels)... } }; + ARM_COMPUTE_UNUSED(channels_array); + ARM_COMPUTE_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f) + { + return f == cn; + }), + function, file, line); +} +#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) ::arm_compute::error_on_channel_not_in(__func__, __FILE__, __LINE__, c, __VA_ARGS__) + +/** Throw an error if the channel is not in format. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] fmt Input channel + * @param[in] cn First channel allowed. + */ +void error_on_channel_not_in_known_format(const char *function, const char *file, const int line, + Format fmt, Channel cn); +#define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) ::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c) + +/** Throw an error if the @ref IMultiHOG container is invalid + * + * An @ref IMultiHOG container is invalid if: + * + * -# it is a nullptr + * -# it doesn't contain models + * -# it doesn't have the HOG data objects with the same phase_type, normalization_type and l2_hyst_threshold (if normalization_type == L2HYS_NORM) + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] multi_hog IMultiHOG container to validate + */ +void error_on_invalid_multi_hog(const char *function, const char *file, const int line, + const IMultiHOG *multi_hog); +#define ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(m) ::arm_compute::error_on_invalid_multi_hog(__func__, __FILE__, __LINE__, m) + +/** Throw an error if the kernel is not configured. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] kernel Kernel to validate. + */ +void error_on_unconfigured_kernel(const char *function, const char *file, const int line, + const IKernel *kernel); +#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) ::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k) + +/** Throw an error if if the coordinates and shape of the subtensor are within the parent tensor. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] parent_shape Parent tensor shape + * @param[in] coords Coordinates inside the parent tensor where the first element of the subtensor is + * @param[in] shape Shape of the subtensor + */ +void error_on_invalid_subtensor(const char *function, const char *file, const int line, + const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) ::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s) + +/** Throw an error if the valid region of a subtensor is not inside the valid region of the parent tensor. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] parent_valid_region Parent valid region. + * @param[in] valid_region Valid region of subtensor. + */ +void error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line, + const ValidRegion &parent_valid_region, const ValidRegion &valid_region); +#define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv) + +/** Throw an error if the input fixed-point positions are different. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_1 The first tensor to be compared. + * @param[in] tensor_2 The second tensor to be compared. + * @param[in] tensors (Optional) Further allowed tensors. + */ +template +void error_on_mismatching_fixed_point_position(const char *function, const char *file, const int line, + const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +{ + const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_array{ { tensor_2, std::forward(tensors)... } }; + ARM_COMPUTE_UNUSED(tensors_array); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(std::any_of(tensors_array.begin(), tensors_array.end(), [&](const ITensor * tensor) + { + return tensor->info()->fixed_point_position() != tensor_1->info()->fixed_point_position(); + }), + function, file, line, "Tensors have different fixed-point positions"); +} +#define ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(...) ::arm_compute::error_on_mismatching_fixed_point_position(__func__, __FILE__, __LINE__, __VA_ARGS__) + +/** Throw an error if the fixed-point value is not representable in the specified Q format. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] value The floating point value to be checked. + * @param[in] tensor Input tensor that has information on data type and fixed-point position. + */ +template +void error_on_value_not_representable_in_fixed_point(const char *function, const char *file, int line, + float value, const ITensor *tensor) +{ + const int fixed_point_position = tensor->info()->fixed_point_position(); + const DataType dt = tensor->info()->data_type(); + const unsigned int q_max_range = 0xFFFFFFFFu >> (((sizeof(unsigned int) - element_size_from_data_type(dt)) * 8) + 1); + const float max_range = q_max_range / (static_cast(1 << fixed_point_position)); + ARM_COMPUTE_UNUSED(max_range); + + ARM_COMPUTE_ERROR_ON_LOC_MSG(value > max_range, function, file, line, + "Value %f is not representable in %s with fixed-point position %d", value, string_from_data_type(dt).c_str(), fixed_point_position); +} +#define ARM_COMPUTE_ERROR_ON_VALUE_NOT_REPRESENTABLE_IN_FIXED_POINT(...) ::arm_compute::error_on_value_not_representable_in_fixed_point(__func__, __FILE__, __LINE__, __VA_ARGS__) +} +#endif /* __ARM_COMPUTE_VALIDATE_H__*/ diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h new file mode 100644 index 0000000000..6e7ef22531 --- /dev/null +++ b/arm_compute/core/Window.h @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_WINDOW_H__ +#define __ARM_COMPUTE_WINDOW_H__ + +#include +#include +#include + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/Utils.h" + +namespace arm_compute +{ +/** Describe a multidimensional execution window. */ +class Window +{ +public: + /** Alias for dimension 0 also known as X dimension */ + static constexpr size_t DimX = 0; + /** Alias for dimension 1 also known as Y dimension */ + static constexpr size_t DimY = 1; + /** Alias for dimension 2 also known as Z dimension */ + static constexpr size_t DimZ = 2; + + /** Default constructor: create a window containing a single element. */ + constexpr Window() + : _dims(), _thread_id(0), _num_threads(1) + { + } + /** Copy constructor + * + * @param[in] src Copy the values from src to a new object + */ + Window(const Window &src); + + /** Describe one of the image's dimensions with a start, end and step. + * + * Iteration through the elements of the dimension is done like this: + * for(int v = start(); v < end(); v += step()) + * { + * ... + * } + */ + class Dimension + { + public: + /** Constructor, by default creates a dimension of 1. + * + * @param[in] start Start of the dimension + * @param[in] end End of the dimension + * @param[in] step Step between two elements of the dimension when iterating. + * + */ + constexpr Dimension(int start = 0, int end = 1, int step = 1) + : _start(start), _end(end), _step(step) + { + } + /** Default assignment operator to allow dimensions to be copied */ + Dimension &operator=(const Dimension &d) = default; + /** Return the start of the dimension */ + constexpr int start() const + { + return _start; + } + /** Return the end of the dimension */ + constexpr int end() const + { + return _end; + } + /** Return the step of the dimension */ + constexpr int step() const + { + return _step; + } + /** Set the dimension's step + * + * @param[in] step The new step + */ + void set_step(int step) + { + _step = step; + } + + private: + int _start; /**< Start of the dimension */ + int _end; /**< End of the dimension */ + int _step; + }; + + /** Read only access to a given dimension of the window + * + * @note Precondition: dimension < Coordinates::num_max_dimensions + * + * @param[in] dimension The dimension to access + * + * @return The requested dimension + */ + constexpr const Dimension &operator[](size_t dimension) const; + + /** Alias to access the first dimension of the window + * + * @return First dimension of the window + */ + constexpr const Dimension &x() const + { + return _dims.at(Window::DimX); + } + + /** Alias to access the second dimension of the window + * + * @return Second dimension of the window + */ + constexpr const Dimension &y() const + { + return _dims.at(Window::DimY); + } + + /** Alias to access the third dimension of the window + * + * @return Third dimension of the window + */ + constexpr const Dimension &z() const + { + return _dims.at(Window::DimZ); + } + + /** Set the values of a given dimension + * + * @param[in] dimension The dimension to set + * @param[in] dim The values to set the dimension to + */ + void set(size_t dimension, const Dimension &dim); + + /** Use the tensor's dimensions to fill the window dimensions. + * + * @param[in] info Tensor information to copy the dimensions from. + * @param[in] first_dimension Only copy dimensions which are greater or equal to this value. + */ + void use_tensor_dimensions(const ITensorInfo *info, size_t first_dimension = Window::DimX); + + /** Shift the values of a given dimension by the given shift_value + * + * @param[in] dimension The dimension to shift + * @param[in] shift_value Value to shift the start and end values of. + */ + void shift(size_t dimension, int shift_value); + + /** Adjust the start or end of a given dimension by the given value + * + * @param[in] dimension The dimension to adjust + * @param[in] adjust_value The adjusted value. + * @param[in] is_at_start The flag to indicate whether adjust the start or end of the dimension. + */ + void adjust(size_t dimension, int adjust_value, bool is_at_start); + + /** Scale the values of a given dimension by the given scale_value + * + * @note The end of the window is rounded up to be a multiple of step after the scaling. + * + * @param[in] dimension The dimension to scale + * @param[in] scale_value Value to scale the start, end and step values of. + */ + void scale(size_t dimension, float scale_value); + + /** Set the step of a given dimension. + * + * @param[in] dimension Dimension to update + * @param[in] step The new dimension's step value + */ + void set_dimension_step(size_t dimension, int step); + + /** Will validate all the window's dimensions' values when asserts are enabled + * + * No-op when asserts are disabled + */ + void validate() const; + + /** Return the number of iterations needed to iterate through a given dimension + * + * @param[in] dimension The requested dimension + * + * @return The number of iterations + */ + constexpr size_t num_iterations(size_t dimension) const; + + /** Split a window into a set of sub windows along a given dimension + * + * For example to split a window into 3 sub-windows along the Y axis, you would have to do:
+ * Window sub0 = window.split_window( 1, 0, 3);
+ * Window sub1 = window.split_window( 1, 1, 3);
+ * Window sub2 = window.split_window( 1, 2, 3);
+ * + * @param[in] dimension Dimension along which the split will be performed + * @param[in] id Id of the sub-window to return. Must be in the range (0, total-1) + * @param[in] total Total number of sub-windows the window will be split into. + * + * @return The subwindow "id" out of "total" + */ + Window split_window(size_t dimension, size_t id, size_t total) const; + /** First 1D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_1D() const + { + return first_slice_window<1>(); + }; + /** First 2D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_2D() const + { + return first_slice_window<2>(); + }; + /** First 3D slice of the window + * + * @return The first slice of the window. + */ + Window first_slice_window_3D() const + { + return first_slice_window<3>(); + }; + /** Slide the passed 1D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_1D(Window &slice) const + { + return slide_window_slice<1>(slice); + } + /** Slide the passed 2D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_2D(Window &slice) const + { + return slide_window_slice<2>(slice); + } + /** Slide the passed 3D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_3D(Window &slice) const + { + return slide_window_slice<3>(slice); + } + /** Slide the passed 4D window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + bool slide_window_slice_4D(Window &slice) const + { + return slide_window_slice<4>(slice); + } + /** Sets the ID of the thread that the window is associated with. + * + * @param id ID of the thread that the window is associated with. + */ + void set_thread_id(unsigned int id) + { + _thread_id = id; + } + /** Sets the number of threads dispatched that the window is associated with. + * + * @param num_threads The number of threads dispatched that the window is associated with. + */ + void set_num_threads(unsigned int num_threads) + { + _num_threads = num_threads; + } + /** Get the ID of the thread that the window is associated with. + * + * @return ID of the thread that the window is associated with. + */ + constexpr unsigned int thread_id() const + { + return _thread_id; + } + /** Get the number of threads dispatched that the window is associated with. + * + * @return The number of threads dispatched that the window is associated with. + */ + constexpr unsigned int num_threads() const + { + return _num_threads; + } + +private: + /** First slice of the window + * + * @return The first slice of the window. + */ + template + Window first_slice_window() const; + + /** Slide the passed window slice. + * + * If slice contains the last slice then it will remain unchanged and false will be returned. + * + * @param[in,out] slice Current slice, to be updated to the next slice. + * + * @return true if slice contains a new slice, false if slice already contained the last slice + */ + template + bool slide_window_slice(Window &slice) const; + +private: + std::array _dims; + unsigned int _thread_id; + unsigned int _num_threads; +}; +} +#include "Window.inl" +#endif /*__ARM_COMPUTE_WINDOW_H__ */ diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl new file mode 100644 index 0000000000..75428a145b --- /dev/null +++ b/arm_compute/core/Window.inl @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +namespace arm_compute +{ +inline Window::Window(const Window &src) + : _dims(), _thread_id(src._thread_id), _num_threads(src._num_threads) +{ + for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + { + set(i, src[i]); + } +} + +inline constexpr const Window::Dimension &Window::operator[](const size_t dimension) const +{ + // Precondition: dimension < Coordinates::num_max_dimensions + return _dims.at(dimension); +} +inline void Window::set(const size_t dimension, const Window::Dimension &dim) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + _dims[dimension] = dim; +} + +inline void Window::shift(const size_t dimension, const int shift_value) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + d = Window::Dimension(d.start() + shift_value, d.end() + shift_value, d.step()); +} + +inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + + if(is_at_start) + { + d = Window::Dimension(d.start() + adjust_value, d.end(), d.step()); + } + else + { + d = Window::Dimension(d.start(), d.end() + adjust_value, d.step()); + } +} + +inline void Window::scale(const size_t dimension, float scale_value) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + Window::Dimension &d = _dims[dimension]; + const int scaled_step = d.step() * scale_value; + const int scaled_end = ceil_to_multiple(d.end() * scale_value, scaled_step); + d = Window::Dimension(d.start() * scale_value, scaled_end, scaled_step); +} + +inline void Window::set_dimension_step(const size_t dimension, const int step) +{ + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + _dims[dimension].set_step(step); +} + +inline void Window::validate() const +{ + for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_ERROR_ON(_dims[i].step() == 0); + ARM_COMPUTE_ERROR_ON(_dims[i].end() <= _dims[i].start()); + ARM_COMPUTE_ERROR_ON((_dims[i].end() - _dims[i].start()) % _dims[i].step()); + } +} + +inline constexpr size_t Window::num_iterations(size_t dimension) const +{ + // Precondition: dimension < Coordinates::num_max_dimensions + // Precondition: (end - start) % step == 0 + return (_dims.at(dimension).end() - _dims.at(dimension).start()) / _dims.at(dimension).step(); +} + +inline Window Window::split_window(const size_t dimension, const size_t id, const size_t total) const +{ + ARM_COMPUTE_ERROR_ON(id >= total); + ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); + + Window out; + + for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + { + if(d == dimension) + { + int start = _dims[d].start(); + int end = _dims[d].end(); + int per_sub_window = (num_iterations(d) / total) * _dims[d].step(); + + start += id * per_sub_window; + + if(id != total - 1) + { + end = start + per_sub_window; + } + + out.set(d, Dimension(start, end, _dims[d].step())); + } + else + { + out.set(d, _dims[d]); + } + } + + return out; +} + +template +inline bool Window::slide_window_slice(Window &slice) const +{ + for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + { + // Did we reach the end of this dimension? + const int v = slice._dims[n].start() + 1; + + if(v < _dims[n].end()) + { + // No: increment + slice._dims[n] = Dimension(v, v + 1, 1); + + // Reset lower dimensions: + for(unsigned int lower = window_dimension; lower < n; ++lower) + { + slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1); + } + return true; + } + } + + // It was the last slice + return false; // Iteration over +} + +template +inline Window Window::first_slice_window() const +{ + Window slice; + + std::copy_n(_dims.begin(), window_dimension, slice._dims.begin()); + + //Initialise higher dimensions to be the first slice. + for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + { + slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1); + } + + return slice; +} + +inline void Window::use_tensor_dimensions(const ITensorInfo *info, const size_t first_dimension) +{ + for(unsigned int n = first_dimension; n < info->num_dimensions(); ++n) + { + set(n, Window::Dimension(0, std::max(info->dimension(n), static_cast(1)))); + } +} +} diff --git a/arm_compute/runtime/Array.h b/arm_compute/runtime/Array.h new file mode 100644 index 0000000000..c8a240e428 --- /dev/null +++ b/arm_compute/runtime/Array.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ARRAY_H__ +#define __ARM_COMPUTE_ARRAY_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of the IArray interface which allocates a static number of T values */ +template +class Array : public IArray +{ +public: + /** Default constructor: empty array */ + Array() + : IArray(0), _values(nullptr) + { + } + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + Array(size_t max_num_values) + : IArray(max_num_values), _values(arm_compute::cpp14::make_unique(max_num_values)) + { + } + + // Inherited methods overridden: + T *buffer() const override + { + return _values.get(); + } + +private: + std::unique_ptr _values; +}; + +using KeyPointArray = Array; +using Coordinates2DArray = Array; +using DetectionWindowArray = Array; +using Size2DArray = Array; +using UInt8Array = Array; +using UInt16Array = Array; +using UInt32Array = Array; +using Int16Array = Array; +using Int32Array = Array; +using FloatArray = Array; +} +#endif /* __ARM_COMPUTE_ARRAY_H__ */ diff --git a/arm_compute/runtime/CL/CLArray.h b/arm_compute/runtime/CL/CLArray.h new file mode 100644 index 0000000000..f4c2ef06d9 --- /dev/null +++ b/arm_compute/runtime/CL/CLArray.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARRAY_H__ +#define __ARM_COMPUTE_CLARRAY_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +namespace arm_compute +{ +/** CLArray implementation */ +template +class CLArray : public ICLArray +{ +public: + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLArray(const CLArray &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLArray &operator=(const CLArray &) = delete; + /** Constructor: initializes an array which can contain up to max_num_points values + * + * @param[in] max_num_values Maximum number of values the array will be able to stored + */ + CLArray(size_t max_num_values) + : ICLArray(max_num_values), _buffer(cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, max_num_values * sizeof(T))) + { + } + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true) + { + ICLArray::map(CLScheduler::get().queue(), blocking); + } + using ICLArray::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap() + { + ICLArray::unmap(CLScheduler::get().queue()); + } + using ICLArray::unmap; + + // Inherited methods overridden: + const cl::Buffer &cl_buffer() const override + { + return _buffer; + } + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override + { + ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get()); + return static_cast(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, this->max_num_values() * sizeof(T))); + } + void do_unmap(cl::CommandQueue &q, uint8_t *mapping) override + { + ARM_COMPUTE_ERROR_ON(nullptr == _buffer.get()); + q.enqueueUnmapMemObject(_buffer, mapping); + } + +private: + cl::Buffer _buffer; +}; + +using CLKeyPointArray = CLArray; +using CLCoordinates2DArray = CLArray; +using CLDetectionWindowArray = CLArray; +using CLSize2DArray = CLArray; +using CLUInt8Array = CLArray; +using CLUInt16Array = CLArray; +using CLUInt32Array = CLArray; +using CLInt16Array = CLArray; +using CLInt32Array = CLArray; +using CLFloatArray = CLArray; +} +#endif /* __ARM_COMPUTE_CLARRAY_H__ */ diff --git a/arm_compute/runtime/CL/CLDistribution1D.h b/arm_compute/runtime/CL/CLDistribution1D.h new file mode 100644 index 0000000000..55dd1247ed --- /dev/null +++ b/arm_compute/runtime/CL/CLDistribution1D.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDISTRIBUTION1D_H__ +#define __ARM_COMPUTE_CLDISTRIBUTION1D_H__ + +#include "arm_compute/core/CL/ICLDistribution1D.h" +#include "arm_compute/core/CL/OpenCL.h" + +#include +#include + +namespace arm_compute +{ +/** CLDistribution1D object class */ +class CLDistribution1D : public ICLDistribution1D +{ +public: + /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + CLDistribution1D(size_t num_bins, int32_t offset, uint32_t range); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDistribution1D(const CLDistribution1D &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLDistribution1D &operator=(const CLDistribution1D &) = delete; + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLDistribution1D::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLDistribution1D::unmap; + + // Inherited methods overridden: + cl::Buffer &cl_buffer() override; + +protected: + // Inherited methods overridden: + uint32_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + cl::Buffer _mem; +}; +} +#endif /* __ARM_COMPUTE_CLDISTRIBUTION1D_H__ */ diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h new file mode 100644 index 0000000000..82929ba139 --- /dev/null +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFUNCTIONS_H__ +#define __ARM_COMPUTE_CLFUNCTIONS_H__ + +/* Header regrouping all the CL functions */ +#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h" +#include "arm_compute/runtime/CL/functions/CLAccumulate.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h" +#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h" +#include "arm_compute/runtime/CL/functions/CLBox3x3.h" +#include "arm_compute/runtime/CL/functions/CLCannyEdge.h" +#include "arm_compute/runtime/CL/functions/CLChannelCombine.h" +#include "arm_compute/runtime/CL/functions/CLChannelExtract.h" +#include "arm_compute/runtime/CL/functions/CLColorConvert.h" +#include "arm_compute/runtime/CL/functions/CLConvolution.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLDerivative.h" +#include "arm_compute/runtime/CL/functions/CLDilate.h" +#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h" +#include "arm_compute/runtime/CL/functions/CLErode.h" +#include "arm_compute/runtime/CL/functions/CLFastCorners.h" +#include "arm_compute/runtime/CL/functions/CLFillBorder.h" +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" +#include "arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h" +#include "arm_compute/runtime/CL/functions/CLGEMMLowp.h" +#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h" +#include "arm_compute/runtime/CL/functions/CLHOGDetector.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h" +#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h" +#include "arm_compute/runtime/CL/functions/CLHistogram.h" +#include "arm_compute/runtime/CL/functions/CLIntegralImage.h" +#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h" +#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h" +#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h" +#include "arm_compute/runtime/CL/functions/CLMagnitude.h" +#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" +#include "arm_compute/runtime/CL/functions/CLMedian3x3.h" +#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h" +#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h" +#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" +#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h" +#include "arm_compute/runtime/CL/functions/CLPhase.h" +#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" +#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" +#include "arm_compute/runtime/CL/functions/CLRemap.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel5x5.h" +#include "arm_compute/runtime/CL/functions/CLSobel7x7.h" +#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" +#include "arm_compute/runtime/CL/functions/CLTableLookup.h" +#include "arm_compute/runtime/CL/functions/CLThreshold.h" +#include "arm_compute/runtime/CL/functions/CLTranspose.h" +#include "arm_compute/runtime/CL/functions/CLWarpAffine.h" +#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h" + +#endif /* __ARM_COMPUTE_CLFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/CL/CLHOG.h b/arm_compute/runtime/CL/CLHOG.h new file mode 100644 index 0000000000..9b4a303eca --- /dev/null +++ b/arm_compute/runtime/CL/CLHOG.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOG_H__ +#define __ARM_COMPUTE_CLHOG_H__ + +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** OpenCL implementation of HOG data-object */ +class CLHOG : public ICLHOG +{ +public: + /** Default constructor */ + CLHOG(); + /** Allocate the HOG descriptor using the given HOG's metadata + * + * @param[in] input HOG's metadata used to allocate the HOG descriptor + */ + void init(const HOGInfo &input); + + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLHOG::map; + + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLHOG::unmap; + + // Inherited method overridden: + void free() override; + const HOGInfo *info() const override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + HOGInfo _info; + cl::Buffer _buffer; +}; +} +#endif /* __ARM_COMPUTE_CLHOG_H__ */ diff --git a/arm_compute/runtime/CL/CLLut.h b/arm_compute/runtime/CL/CLLut.h new file mode 100644 index 0000000000..9bac2b44c3 --- /dev/null +++ b/arm_compute/runtime/CL/CLLut.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLUT_H__ +#define __ARM_COMPUTE_CLLUT_H__ + +#include "arm_compute/core/CL/ICLLut.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLLutAllocator.h" + +#include +#include + +namespace arm_compute +{ +class ILutAllocator; + +/** Basic implementation of the OpenCL lut interface */ +class CLLut : public ICLLut +{ +public: + /** Constructor */ + CLLut(); + /** Constructor: initializes a LUT which can contain num_values values of data_type type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + CLLut(size_t num_elements, DataType data_type); + /** Return a pointer to the lut's allocator + * + * @return A pointer to the lut's allocator + */ + ILutAllocator *allocator(); + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLLut::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLLut::unmap; + + // Inherited methods overridden: + size_t num_elements() const override; + uint32_t index_offset() const override; + size_t size_in_bytes() const override; + DataType type() const override; + const cl::Buffer &cl_buffer() const override; + void clear() override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + CLLutAllocator _allocator; /**< Instance of the OpenCL lut allocator */ +}; +} +#endif /*__ARM_COMPUTE_CLLUT_H__ */ diff --git a/arm_compute/runtime/CL/CLLutAllocator.h b/arm_compute/runtime/CL/CLLutAllocator.h new file mode 100644 index 0000000000..4648ffb51f --- /dev/null +++ b/arm_compute/runtime/CL/CLLutAllocator.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLUTALLOCATOR_H__ +#define __ARM_COMPUTE_CLLUTALLOCATOR_H__ + +#include "arm_compute/runtime/ILutAllocator.h" + +#include "arm_compute/core/CL/OpenCL.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of a CL memory LUT allocator. */ +class CLLutAllocator : public ILutAllocator +{ +public: + /** Default constructor. */ + CLLutAllocator(); + /** Default destructor. */ + ~CLLutAllocator() = default; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLLutAllocator(const CLLutAllocator &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + const CLLutAllocator &operator=(const CLLutAllocator &) = delete; + /** Interface to be implemented by the child class to return the pointer to the mapped data. */ + uint8_t *data(); + /** Interface to be implemented by the child class to return the pointer to the CL data. */ + const cl::Buffer &cl_data() const; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + uint8_t *map(cl::CommandQueue &q, bool blocking); + /** Enqueue an unmap operation of the allocated buffer on the given queue. + * + * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping The cpu mapping to unmap. + */ + void unmap(cl::CommandQueue &q, uint8_t *mapping); + +protected: + /** Allocate num_elements() * sizeof(type()) of OpenCL memory. */ + void allocate() override; + /** Call map() on the OpenCL buffer. + * + * @return A pointer to the beginning of the LUT's allocation. + */ + uint8_t *lock() override; + /** Call unmap() on the OpenCL buffer. */ + void unlock() override; + +private: + cl::Buffer _buffer; /**< OpenCL buffer containing the LUT data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ +}; +} + +#endif /* __ARM_COMPUTE_CLLUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/CLMultiHOG.h b/arm_compute/runtime/CL/CLMultiHOG.h new file mode 100644 index 0000000000..17bb4e03c1 --- /dev/null +++ b/arm_compute/runtime/CL/CLMultiHOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMULTIHOG_H__ +#define __ARM_COMPUTE_CLMULTIHOG_H__ + +#include "arm_compute/core/CL/ICLMultiHOG.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLHOG.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of the CL multi HOG data-objects */ +class CLMultiHOG : public ICLMultiHOG +{ +public: + /** Constructor + * + * @param[in] num_models Number of HOG data objects to contain + * + */ + CLMultiHOG(size_t num_models); + + // Inherited methods overridden: + size_t num_models() const override; + ICLHOG *cl_model(size_t index) override; + const ICLHOG *cl_model(size_t index) const override; + +private: + size_t _num_models; + std::unique_ptr _model; +}; +} +#endif /*__ARM_COMPUTE_CLMULTIHOG_H__ */ diff --git a/arm_compute/runtime/CL/CLMultiImage.h b/arm_compute/runtime/CL/CLMultiImage.h new file mode 100644 index 0000000000..f70929db07 --- /dev/null +++ b/arm_compute/runtime/CL/CLMultiImage.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMULTIIMAGE_H__ +#define __ARM_COMPUTE_CLMULTIIMAGE_H__ + +#include "arm_compute/core/CL/ICLMultiImage.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic implementation of the CL multi-planar image interface */ +class CLMultiImage : public ICLMultiImage +{ +public: + /** Constructor */ + CLMultiImage(); + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Heigth of the whole image + * @param[in] format Format of the whole image + */ + void init(unsigned int width, unsigned int height, Format format); + /** Init the multi-planar image + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init_auto_padding(unsigned int width, unsigned int height, Format format); + /** Allocated a previously initialised multi image + * + * @note The multi image must not already be allocated when calling this function. + * + **/ + void allocate(); + + // Inherited methods overridden: + const MultiImageInfo *info() const override; + CLImage *cl_plane(unsigned int index) override; + const CLImage *cl_plane(unsigned int index) const override; + +private: + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + * @param[in] auto_padding Specifies whether the image uses auto padding + */ + void internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding); + + MultiImageInfo _info; /** Instance of the multi-planar image's meta data */ + std::array _plane; /* Instance CLImage to hold the planar's information */ +}; +} +#endif /*__ARM_COMPUTE_CLMULTIIMAGE_H__ */ diff --git a/arm_compute/runtime/CL/CLPyramid.h b/arm_compute/runtime/CL/CLPyramid.h new file mode 100644 index 0000000000..5e0afb3c63 --- /dev/null +++ b/arm_compute/runtime/CL/CLPyramid.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPYRAMID_H__ +#define __ARM_COMPUTE_CLPYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include +#include + +namespace arm_compute +{ +class CLTensor; + +/** Basic implementation of the OpenCL pyramid interface */ +class CLPyramid : public IPyramid +{ +public: + /** Default constructor */ + CLPyramid(); + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + */ + void init(const PyramidInfo &info); + + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] info Pyramid's metadata + */ + void init_auto_padding(const PyramidInfo &info); + + /** Allocate the planes in the pyramid + * + * @note The pyramid must not already be allocated when calling this function. + * + **/ + void allocate(); + + // Inherited method overridden + const PyramidInfo *info() const override; + CLTensor *get_pyramid_level(size_t index) const override; + +private: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + * @param[in] auto_padding Specifies whether the image in the pyramid use auto padding + */ + void internal_init(const PyramidInfo &info, bool auto_padding); + + PyramidInfo _info; + std::unique_ptr _pyramid; +}; +} +#endif /*__ARM_COMPUTE_CLPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h new file mode 100644 index 0000000000..8e80259b59 --- /dev/null +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHEDULER_H__ +#define __ARM_COMPUTE_CLSCHEDULER_H__ + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLKernel; + +/** Provides global access to a CL context and command queue. */ +class CLScheduler +{ +private: + /** Constructor */ + CLScheduler(); + +public: + /** Access the scheduler singleton. + * + * @return The scheduler + */ + static CLScheduler &get(); + /** Initialises the context and command queue used by the scheduler to default values + * and sets a default device and kernel path for the @ref CLKernelLibrary. + */ + void default_init() + { + CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault()); + init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault()); + } + /** Schedule the execution of the passed kernel if possible. + * + * @param[in] kernel Kernel to execute. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + */ + void enqueue(ICLKernel &kernel, bool flush = true); + + /** Initialises the context and command queue to be used by the scheduler. + * + * @param[in] context A CL context. + * @param[in] queue A CL command queue. + * @param[in] device A CL device. + */ + void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(), + cl::Device device = cl::Device::getDefault()) + { + _context = std::move(context); + _queue = std::move(queue); + _target = get_target_from_device(device); + } + + /** Accessor for the associated CL context. + * + * @return A CL context. + */ + cl::Context &context() + { + return _context; + } + + /** Accessor to set the CL context to be used by the scheduler. + * + * @param[in] context A CL context. + */ + void set_context(cl::Context context) + { + _context = std::move(context); + } + + /** Accessor for the associated CL command queue. + * + * @return A CL command queue. + */ + cl::CommandQueue &queue() + { + return _queue; + } + + /** Get the target GPU. + * + * @return The target GPU. + */ + GPUTarget target() const + { + return _target; + } + + /** Accessor to set the CL command queue to be used by the scheduler. + * + * @param[in] queue A CL command queue. + */ + void set_queue(cl::CommandQueue queue) + { + _queue = std::move(queue); + } + + /** Accessor to set target GPU to be used by the scheduler. + * + * @param[in] target The target GPU. + */ + void set_target(GPUTarget target) + { + _target = target; + } + + /** Blocks until all commands in the associated command queue have finished. */ + void sync() + { + _queue.finish(); + } + + /** Enqueues a marker into the associated command queue and return the event. + * + * @return An event that can be waited on to block the executing thread. + */ + cl::Event enqueue_sync_event() + { + cl::Event event; + _queue.enqueueMarker(&event); + + return event; + } + +private: + cl::Context _context; + cl::CommandQueue _queue; + GPUTarget _target; +}; +} +#endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */ diff --git a/arm_compute/runtime/CL/CLSubTensor.h b/arm_compute/runtime/CL/CLSubTensor.h new file mode 100644 index 0000000000..4bab164779 --- /dev/null +++ b/arm_compute/runtime/CL/CLSubTensor.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSUBTENSOR_H__ +#define __ARM_COMPUTE_CLSUBTENSOR_H__ + +#include "arm_compute/core/SubTensorInfo.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the OpenCL sub-tensor interface */ +class CLSubTensor : public ICLTensor +{ +public: + /** Constructor + * + * @param[in] parent Parent tensor + * @param[in] tensor_shape Shape of the subtensor + * @param[in] coords Coordinates of the first subtensor element inside the parent tensor. + */ + CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Destructor: free the tensor's memory */ + ~CLSubTensor() = default; + /** Restrict instances of this class to be copy constructed */ + CLSubTensor(const CLSubTensor &) = delete; + /** Restrict instances of this class to be copied */ + CLSubTensor &operator=(const CLSubTensor &) = delete; + /** Allow instances of this class to be move constructed */ + CLSubTensor(CLSubTensor &&) = default; + /** Allow instances of this class to be moved */ + CLSubTensor &operator=(CLSubTensor &&) = default; + + /** Enqueue a map operation of the allocated buffer. + * + * @note Mapping a subtensor will lead to the mapping of the whole parent tensor for now. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLTensor::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note Unmapping a subtensor will lead to the unmapping of the whole parent tensor for now. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLTensor::unmap; + + /** Return the parent tensor of the subtensor + * + * @return Parent tensor + */ + ICLTensor *parent(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + ICLTensor *_parent; + mutable SubTensorInfo _info; +}; +} +#endif /*__ARM_COMPUTE_CLSUBTENSOR_H__ */ diff --git a/arm_compute/runtime/CL/CLTensor.h b/arm_compute/runtime/CL/CLTensor.h new file mode 100644 index 0000000000..2c685d1ed1 --- /dev/null +++ b/arm_compute/runtime/CL/CLTensor.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTENSOR_H__ +#define __ARM_COMPUTE_CLTENSOR_H__ + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" + +#include + +namespace arm_compute +{ +class ITensorAllocator; +class ITensorInfo; + +/** Basic implementation of the OpenCL tensor interface */ +class CLTensor : public ICLTensor +{ +public: + /** Constructor */ + CLTensor(); + /** Return a pointer to the tensor's allocator + * + * @return A pointer to the tensor's allocator + */ + ITensorAllocator *allocator(); + /** Enqueue a map operation of the allocated buffer. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed. + */ + void map(bool blocking = true); + using ICLTensor::map; + /** Enqueue an unmap operation of the allocated and mapped buffer. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + void unmap(); + using ICLTensor::unmap; + + // Inherited methods overridden: + TensorInfo *info() const override; + TensorInfo *info() override; + const cl::Buffer &cl_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(cl::CommandQueue &q, bool blocking) override; + void do_unmap(cl::CommandQueue &q) override; + +private: + mutable CLTensorAllocator _allocator; /**< Instance of the OpenCL tensor allocator */ +}; + +using CLImage = CLTensor; +} +#endif /*__ARM_COMPUTE_CLTENSOR_H__ */ diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h new file mode 100644 index 0000000000..ed371e0642 --- /dev/null +++ b/arm_compute/runtime/CL/CLTensorAllocator.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTENSORALLOCATOR_H__ +#define __ARM_COMPUTE_CLTENSORALLOCATOR_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of a CL memory tensor allocator. */ +class CLTensorAllocator : public ITensorAllocator +{ +public: + /** Default constructor. */ + CLTensorAllocator(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + CLTensorAllocator(const CLTensorAllocator &) = delete; + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + CLTensorAllocator &operator=(const CLTensorAllocator &) = delete; + /** Allow instances of this class to be moved */ + CLTensorAllocator(CLTensorAllocator &&) = default; + /** Allow instances of this class to be moved */ + CLTensorAllocator &operator=(CLTensorAllocator &&) = default; + /** Default destructor */ + ~CLTensorAllocator() = default; + + /** Interface to be implemented by the child class to return the pointer to the mapped data. */ + uint8_t *data(); + /** Interface to be implemented by the child class to return the pointer to the CL data. */ + const cl::Buffer &cl_data() const; + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + uint8_t *map(cl::CommandQueue &q, bool blocking); + /** Enqueue an unmap operation of the allocated buffer on the given queue. + * + * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + * @param[in,out] q The CL command queue to use for the mapping operation. + * @param[in] mapping The cpu mapping to unmap. + */ + void unmap(cl::CommandQueue &q, uint8_t *mapping); + + /** Allocate size specified by TensorInfo of OpenCL memory. + * + * @note: The tensor must not already be allocated when calling this function. + * + */ + void allocate() override; + + /** Free allocated OpenCL memory. + * + * @note The tensor must have been allocated when calling this function. + * + */ + void free() override; + +protected: + /** Call map() on the OpenCL buffer. + * + * @return A pointer to the beginning of the tensor's allocation. + */ + uint8_t *lock() override; + /** Call unmap() on the OpenCL buffer. */ + void unlock() override; + +private: + cl::Buffer _buffer; /**< OpenCL buffer containing the tensor data. */ + uint8_t *_mapping; /**< Pointer to the CPU mapping of the OpenCL buffer. */ +}; +} +#endif /* __ARM_COMPUTE_CLTENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/CL/ICLSimpleFunction.h b/arm_compute/runtime/CL/ICLSimpleFunction.h new file mode 100644 index 0000000000..130c58a98c --- /dev/null +++ b/arm_compute/runtime/CL/ICLSimpleFunction.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ +#define __ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single OpenCL kernel */ +class ICLSimpleFunction : public IFunction +{ +public: + /** Default constructor */ + ICLSimpleFunction(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ + CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /*__ARM_COMPUTE_ICLSIMPLEFUNCTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h new file mode 100644 index 0000000000..40ee396644 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ +#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLAbsoluteDifferenceKernel + * + * @note The tensor data types for the inputs must be U8 or S16. + * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types. + */ +class CLAbsoluteDifference : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 First input tensor. Data types supported: U8, S16 + * @param[in] input2 Second input tensor. Data types supported: U8, S16 + * @param[out] output Output tensor. Data types supported: U8, S16 + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h new file mode 100644 index 0000000000..51f6df9acb --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLAccumulate.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACCUMULATE_H__ +#define __ARM_COMPUTE_CLACCUMULATE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLAccumulateKernel */ +class CLAccumulate : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] accum Destination tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, ICLTensor *accum); +}; + +/** Basic function to run @ref CLAccumulateWeightedKernel */ +class CLAccumulateWeighted : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors, and the scale value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32. + * @param[in,out] accum Accumulated tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, float alpha, ICLTensor *accum); +}; + +/** Basic function to run @ref CLAccumulateSquaredKernel */ +class CLAccumulateSquared : public ICLSimpleFunction +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] shift The input with a value input the range of [0, 15]. Data types supported: U32. + * @param[in,out] accum Accumulated tensor. Data types supported: S16. + */ + void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); +}; +} +#endif /*__ARM_COMPUTE_CLACCUMULATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h new file mode 100644 index 0000000000..6468c996a2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLACTIVATIONLAYER_H__ +#define __ARM_COMPUTE_CLACTIVATIONLAYER_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLActivationLayerKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class CLActivationLayer : public ICLSimpleFunction +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); +}; +} +#endif /* __ARM_COMPUTE_CLACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h new file mode 100644 index 0000000000..feadf39820 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICADDITION_H__ +#define __ARM_COMPUTE_CLARITHMETICADDITION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLArithmeticAdditionKernel + * + * @note The tensor data type for the inputs must be U8, S16, F16, F32. + * @note The function performs an arithmetic addition between two tensors. + */ +class CLArithmeticAddition : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICADDITION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h new file mode 100644 index 0000000000..d7bb21144e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ +#define __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLArithmeticSubtractionKernel + * + * @note The tensor data type for the inputs must be U8, S16, F16, F32 + * @note The function performs an arithmetic subtraction between two tensors. + */ +class CLArithmeticSubtraction : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h new file mode 100644 index 0000000000..d766d1c69c --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer. + * + * Batch normalization is calculated by: + * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * + */ +class CLBatchNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + CLBatchNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run() override; + +private: + CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */ +}; +} +#endif /* __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h new file mode 100644 index 0000000000..a4a523baaa --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEAND_H__ +#define __ARM_COMPUTE_CLBITWISEAND_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseAndKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise AND operation using the two input tensors. + */ +class CLBitwiseAnd : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEAND_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h new file mode 100644 index 0000000000..0ff16af870 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISENOT_H__ +#define __ARM_COMPUTE_CLBITWISENOT_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseNotKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise NOT operation on input tensor. + */ +class CLBitwiseNot : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISENOT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h new file mode 100644 index 0000000000..880c4762be --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEOR_H__ +#define __ARM_COMPUTE_CLBITWISEOR_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseOrKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise OR operation using the two input tensors. + */ +class CLBitwiseOr : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h new file mode 100644 index 0000000000..772dec22ea --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBITWISEXOR_H__ +#define __ARM_COMPUTE_CLBITWISEXOR_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLBitwiseXorKernel. + * + * @note The tensor data type for the inputs must be U8. + * @note The function performs a bitwise XOR operation using the two input tensors. + */ +class CLBitwiseXor : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 Input tensor. Data types supported: U8. + * @param[in] input2 Input tensor. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); +}; +} +#endif /* __ARM_COMPUTE_CLBITWISEXOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h new file mode 100644 index 0000000000..5e51c1a390 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLBox3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLBOX3X3_H__ +#define __ARM_COMPUTE_CLBOX3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLBox3x3Kernel + * + */ +class CLBox3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLBOX3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h new file mode 100644 index 0000000000..e5a82b2263 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCANNYEDGE_H__ +#define __ARM_COMPUTE_CLCANNYEDGE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) + * -# @ref CLSobel3x3 (if gradient_size == 3) or @ref CLSobel5x5 (if gradient_size == 5) or @ref CLSobel7x7 (if gradient_size == 7) + * -# @ref CLGradientKernel + * -# @ref CLEdgeNonMaxSuppressionKernel + * -# @ref CLEdgeTraceKernel + * + */ +class CLCannyEdge : public IFunction +{ +public: + /** Constructor */ + CLCannyEdge(); + /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] upper_thr Upper threshold used for the hysteresis. + * @param[in] lower_thr Lower threshold used for the hysteresis. + * @param[in] gradient_size Gradient size (3, 5 or 7). + * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + virtual void run() override; + +private: + std::unique_ptr _sobel; /**< Pointer to Sobel kernel. */ + CLGradientKernel _gradient; /**< Gradient kernel. */ + CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ + CLEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel. */ + CLEdgeTraceKernel _edge_trace; /**< Edge tracing kernel. */ + CLImage _gx; /**< Source tensor - Gx component. */ + CLImage _gy; /**< Source tensor - Gy component. */ + CLImage _mag; /**< Source tensor - Magnitude. */ + CLImage _phase; /**< Source tensor - Phase. */ + CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */ + CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */ +}; +} + +#endif /* __ARM_COMPUTE_CLCANNYEDGE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h new file mode 100644 index 0000000000..337e6b4820 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELCOMBINE_H__ +#define __ARM_COMPUTE_CLCHANNELCOMBINE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLChannelCombineKernel to perform channel combination. */ +class CLChannelCombine : public ICLSimpleFunction +{ +public: + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. + * @param[out] output The single planar output tensor. + */ + void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. + * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. + * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. + * @param[out] output The multi planar output image. + */ + void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_CLCHANNELCOMBINE_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h new file mode 100644 index 0000000000..1753374622 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCHANNELEXTRACT_H__ +#define __ARM_COMPUTE_CLCHANNELEXTRACT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLChannelExtractKernel to perform channel extraction. */ +class CLChannelExtract : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input tensor to extract the channel from. Formats supported: Any single planar. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Must be of U8 format. + */ + void configure(const ICLTensor *input, Channel channel, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image to extract channel from. + * @param[in] channel The channel to extract. + * @param[out] output The extracted 2D channel. Must be of U8 format. + */ + void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); +}; +} +#endif /*__ARM_COMPUTE_CLCHANNELEXTRACT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h new file mode 100644 index 0000000000..12457a0cf2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLColorConvert.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCOLORCONVERT_H__ +#define __ARM_COMPUTE_CLCOLORCONVERT_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLMultiImage; +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to run @ref CLColorConvertKernel + * + * @note The function performs color convert between images. + */ +class CLColorConvert : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input single-planar tensor from which to convert + * @param[in] output The converted single-planar output tensor + */ + void configure(const ICLTensor *input, ICLTensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted single-planar output image + */ + void configure(const ICLMultiImage *input, ICLImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The single-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const ICLImage *input, ICLMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const ICLMultiImage *input, ICLMultiImage *output); +}; +} +#endif /* __ARM_COMPUTE_CLCOLORCONVERT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h new file mode 100644 index 0000000000..f526f6ff4a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLConvolution.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTION_H__ +#define __ARM_COMPUTE_CLCONVOLUTION_H__ + +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolution3x3Kernel + * + */ +class CLConvolution3x3 : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; + +/** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolutionKernel or
+ * @ref CLSeparableConvolutionHorKernel and @ref CLSeparableConvolutionVertKernel (if convolution matrix is separable) + * + */ +template +class CLConvolutionSquare : public IFunction +{ +public: + /** Default constructor */ + CLConvolutionSquare(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overriden: + void run() override; + +private: + CLTensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + CLSeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + CLSeparableConvolutionVertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + CLConvolutionKernel _kernel; /**< kernel for non-separated convolution **/ + CLFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to run 5x5 convolution. */ +using CLConvolution5x5 = CLConvolutionSquare<5>; +/** Basic function to run 7x7 convolution. */ +using CLConvolution7x7 = CLConvolutionSquare<7>; +/** Basic function to run 9x9 convolution. */ +using CLConvolution9x9 = CLConvolutionSquare<9>; + +/** Basic function to execute non-square convolution. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLConvolutionRectangleKernel or
+ * + * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 + */ +class CLConvolutionRectangle : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] rows Rows of convolution kernel. + * @param[in] cols Columns of convolution kernel. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLCONVOLUTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h new file mode 100644 index 0000000000..6a40396f9a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Function to reshape and transpose the weights. This function calls the following kernels: + * -# @ref CLWeightsReshapeKernel + * -# @ref CLGEMMTranspose1xWKernel + */ +class CLConvolutionLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + CLConvolutionLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. + * Data types supported: Same as @p weights. + */ + void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW); + // Inherited methods overridden: + void run() override; + +private: + CLConvolutionLayerWeightsReshapeKernel _weights_reshape_kernel; + CLGEMMTranspose1xWKernel _weights_transposed_kernel; + CLTensor _weights_reshaped; + bool _transpose1xW; +}; + +/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels: + * + * -# @ref CLConvolutionLayerWeightsReshapeKernel (executed only once for each configuration) + * -# @ref CLGEMMTranspose1xWKernel (executed only once for each configuration) + * -# @ref CLIm2ColKernel + * -# @ref CLGEMMInterleave4x4Kernel + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLCol2ImKernel + */ +class CLConvolutionLayer : public IFunction +{ +public: + /** Default constructor */ + CLConvolutionLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F16, F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + + // Inherited methods overridden: + void run() override; + +private: + CLConvolutionLayerReshapeWeights _reshape_weights; + CLIm2ColKernel _input_im2col_kernel; + CLGEMMInterleave4x4Kernel _input_interleave_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLCol2ImKernel _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _input_interleaved_reshaped; + CLTensor _weights_reshaped; + CLTensor _weights_transposed; + CLTensor _gemm_output; + bool _has_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; +}; +} +#endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h new file mode 100644 index 0000000000..3199936b82 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; +class CLDepthConcatenateKernel; +class CLFillBorderKernel; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref CLDepthConcatenateKernel + * + */ +class CLDepthConcatenate : public IFunction +{ +public: + /** Default constructor */ + CLDepthConcatenate(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: F32. + */ + void configure(std::vector inputs_vector, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvert.h b/arm_compute/runtime/CL/functions/CLDepthConvert.h new file mode 100644 index 0000000000..f11027656d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDepthConvert.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__ +#define __ARM_COMPUTE_CLDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLDepthConvertKernel. */ +class CLDepthConvert : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - U8 -> U16, S16, U32, S32 + * - U16 -> U8, U32, S32 + * - S16 -> U8, U32, S32 + * - U32 -> U8, U16, S16 + * - S32 -> U8, U16, S16 + * + * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32. + * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); +}; +} +#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h new file mode 100644 index 0000000000..05033e8172 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDerivative.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDERIVATIVE_H__ +#define __ARM_COMPUTE_CLDERIVATIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute first order derivative operator. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLDerivativeKernel + * + */ +class CLDerivative : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data types supported: S16. + * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /* __ARM_COMPUTE_CLDERIVATIVE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h new file mode 100644 index 0000000000..8534139c86 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLDilate.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDILATE_H__ +#define __ARM_COMPUTE_CLDILATE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute dilate. This function calls the following OpenCL kernels: +* +* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) +* -# @ref CLDilateKernel +* +*/ +class CLDilate : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode. + * + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_CLDILATE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h new file mode 100644 index 0000000000..d7182756b5 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ +#define __ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ + +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "arm_compute/runtime/CL/CLDistribution1D.h" +#include "arm_compute/runtime/CL/CLLut.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute histogram equalization. This function calls the following CL kernels: + * + * -# @ref CLHistogramKernel + * -# @ref CLTableLookupKernel + * + */ +class CLEqualizeHistogram : public IFunction +{ +public: + /** Default Constructor. */ + CLEqualizeHistogram(); + /** Initialise the kernel's inputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] output Output of same data type with equalized brightness and contrast. + */ + void configure(const ICLImage *input, ICLImage *output); + + // Inherited methods overridden: + void run() override; + +private: + CLHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */ + CLTableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + CLLut _cd_lut; /**< Holds the equalization lookuptable. */ + static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */ + static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */ +}; +} +#endif /*__ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h new file mode 100644 index 0000000000..cd2f5516e2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLErode.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLERODE_H__ +#define __ARM_COMPUTE_CLERODE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute erode. This function calls the following OpenCL kernels: +* +* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) +* -# @ref CLErodeKernel +* +*/ +class CLErode : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode + * + * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_CLERODE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h new file mode 100644 index 0000000000..79d82af462 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFastCorners.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFASTCORNERS_H__ +#define __ARM_COMPUTE_CLFASTCORNERS_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute fast corners. This function calls the following CL kernels: + * + * -# @ref CLFastCornersKernel + * -# @ref CLNonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) + * -# @ref CLCopyToArrayKernel + * + */ +class CLFastCorners : public IFunction +{ +public: + /** Constructor */ + CLFastCorners(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLFastCorners(const CLFastCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLFastCorners &operator=(const CLFastCorners &) = delete; + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in] input Source image. Data types supported: U8. + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. + * @param[out] corners Array of keypoints to store the results. + * @param[in,out] num_corners Record number of corners in the array + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const ICLImage *input, float threshold, bool nonmax_suppression, CLKeyPointArray *corners, unsigned int *num_corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + // Inherited methods overridden: + void run() override; + +private: + CLFastCornersKernel _fast_corners_kernel; + CLNonMaximaSuppression3x3 _suppr_func; + CLCopyToArrayKernel _copy_array_kernel; + CLImage _output; + CLImage _suppr; + Window _win; + bool _non_max; + unsigned int *_num_corners; + cl::Buffer _num_buffer; + CLKeyPointArray *_corners; + uint8_t _constant_border_value; +}; +} +#endif /*__ARM_COMPUTE_CLFASTCORNERS_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h new file mode 100644 index 0000000000..b4855475c3 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFILLBORDER_H__ +#define __ARM_COMPUTE_CLFILLBORDER_H__ + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLFillBorderKernel */ +class CLFillBorder : public ICLSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in,out] tensor Source tensor. Data types supported: U8, S16 + * @param[in] border_width The border width + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); +}; +} +#endif /*__ARM_COMPUTE_FILLBORDER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h new file mode 100644 index 0000000000..826f445bd8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +/** Basic function to reshape the weights of Fully Connected layer with OpenCL. This function calls the following kernels: + * + * -# @ref CLTransposeKernel (if @p transpose_weights is set to true) + * -# @ref CLGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class CLFullyConnectedLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + CLFullyConnectedLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. + * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + */ + void configure(const ICLTensor *input, ICLTensor *output, bool transpose_weights, bool is_batched_fc_layer); + + // Inherited methods overridden: + void run() override; + +private: + CLTransposeKernel _transpose_kernel; + CLGEMMTranspose1xWKernel _transpose1xW_kernel; + CLTensor _transpose_output; + bool _transpose_weights; + bool _is_batched_fc_layer; +}; + +/** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels: + * + * -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false) (called once) + * -# @ref CLGEMMInterleave4x4Kernel (called if we have a multi-batch input) + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class CLFullyConnectedLayer : public IFunction +{ +public: + /** Constructor */ + CLFullyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input + * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); + + //Inherited methods override + void run() override; + +private: + void configure_fc_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + void configure_fc_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + void configure_conv_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + void configure_conv_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + + CLIm2ColKernel _im2col_kernel; + CLFullyConnectedLayerReshapeWeights _reshape_weights_kernel; + CLGEMMInterleave4x4Kernel _interleave4x4_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + CLTensor _im2col_output; + CLTensor _interleave4x4_output; + CLTensor _reshape_weights_output; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _is_batched_fc_layer; + bool _accumulate_biases; +}; +} +#endif /* __ARM_COMPUTE_CLFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h new file mode 100644 index 0000000000..043b2b8115 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMM_H__ +#define __ARM_COMPUTE_CLGEMM_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute GEMM on OpenCL. Data types supported: F32, F16. This function calls the following OpenCL kernels: + * + * -# @ref CLGEMMInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref CLGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref CLGEMMMatrixMultiplyKernel + * -# @ref CLGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * + */ +class CLGEMM : public IFunction +{ +public: + /** Default constructor. */ + CLGEMM(); + /** Initialise the kernel's inputs and output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * + * @note All tensors must have the same data type. Data types supported: F32, F16 + * + * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix + * + * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32, F16 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. + * @param[out] output Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + */ + void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta); + + // Inherited methods overridden: + void run() override; + +private: + CLGEMMInterleave4x4Kernel _interleave_kernel; + CLGEMMTranspose1xWKernel _transpose_kernel; + CLGEMMMatrixMultiplyKernel _mm_kernel; + CLGEMMMatrixAdditionKernel _ma_kernel; + CLTensor _tmp_a; + CLTensor _tmp_b; + bool _run_vector_matrix_multiplication; + bool _run_addition; +}; +} + +#endif /* __ARM_COMPUTE_CLGEMM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h new file mode 100644 index 0000000000..b80136b328 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__ +#define __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute CLGEMMInterleave4x4Kernel. This function calls the following OpenCL kernel: + * + * -# @ref CLGEMMInterleave4x4Kernel + * + */ +class CLGEMMInterleave4x4 : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowp.h b/arm_compute/runtime/CL/functions/CLGEMMLowp.h new file mode 100644 index 0000000000..da8883c3f8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGEMMLowp.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMLOWP_H__ +#define __ARM_COMPUTE_CLGEMMLOWP_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute GEMMLowp on OpenCL. This function calls the following OpenCL kernels: +* +* -# @ref CLGEMMInterleave4x4Kernel +* -# @ref CLGEMMTranspose1xWKernel +* -# @ref CLGEMMLowpMatrixMultiplyKernel +* +*/ +class CLGEMMLowp : public IFunction +{ +public: + /** Constructor */ + CLGEMMLowp(); + /** Initialise the kernel's inputs, output + * + * @note GEMM_LOWP: low precision matrix multiply kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + * @param[in] a First input tensor (Matrix A). Data types supported: U8. + * @param[in] b Second input tensor (Matrix B). Data types supported: same as @p a. + * @param[out] output Output tensor. Data types supported: same as @p a. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Multiplied with each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + + // Inherited methods overridden: + void run() override; + +private: + CLGEMMInterleave4x4Kernel _interleave_kernel; + CLGEMMTranspose1xWKernel _transpose_kernel; + CLGEMMLowpMatrixMultiplyKernel _mm_kernel; + CLTensor _tmp_a; + CLTensor _tmp_b; +}; +} +#endif /*__ARM_COMPUTE_CLGEMMLOWP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h new file mode 100644 index 0000000000..f8223bc5f5 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3_H__ +#define __ARM_COMPUTE_CLGAUSSIAN3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian3x3Kernel + * + */ +class CLGaussian3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h new file mode 100644 index 0000000000..148b9a9924 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5_H__ +#define __ARM_COMPUTE_CLGAUSSIAN5X5_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian5x5HorKernel + * -# @ref CLGaussian5x5VertKernel + * + */ +class CLGaussian5x5 : public IFunction +{ +public: + /** Default Constructor. */ + CLGaussian5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */ + CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp; /**< Temporary buffer */ +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h new file mode 100644 index 0000000000..97935193dc --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ +#define __ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ + +#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" + +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Common interface for all Gaussian pyramid functions + */ +class CLGaussianPyramid : public IFunction +{ +public: + /** Constructor */ + CLGaussianPyramid(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramid(const CLGaussianPyramid &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGaussianPyramid &operator=(const CLGaussianPyramid &) = delete; + /** Allow instances of this class to be moved */ + CLGaussianPyramid(CLGaussianPyramid &&) = default; + /** Allow instances of this class to be moved */ + CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default; + /** Default destructor */ + virtual ~CLGaussianPyramid() = default; + /** Initialise the function's source, destinations and border mode. + * + * @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: U8. + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0; + +protected: + ICLTensor *_input; + CLPyramid *_pyramid; + CLPyramid _tmp; +}; + +/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussianPyramidHorKernel + * -# @ref CLGaussianPyramidVertKernel + */ +class CLGaussianPyramidHalf : public CLGaussianPyramid +{ +public: + /** Constructor */ + CLGaussianPyramidHalf(); + + // Inherited methods overridden: + void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _border_handler; + std::unique_ptr _horizontal_reduction; + std::unique_ptr _vertical_reduction; +}; + +/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLGaussian5x5 + * -# @ref CLScaleKernel + */ +class CLGaussianPyramidOrb : public CLGaussianPyramid +{ +public: + /** Constructor */ + CLGaussianPyramidOrb(); + + // Inherited methods overridden: + void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _gauss5x5; + std::unique_ptr _scale_nearest; +}; +} +#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h new file mode 100644 index 0000000000..cdb23bff33 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ +#define __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ + +#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class IHOG; +/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels: + * + * -# @ref CLHOGGradient + * -# @ref CLHOGOrientationBinningKernel + * -# @ref CLHOGBlockNormalizationKernel + * + */ +class CLHOGDescriptor : public IFunction +{ +public: + /** Default constructor */ + CLHOGDescriptor(); + /** Initialise the function's source, destination, HOG data-object and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block + * @param[in] hog HOG data object which describes the HOG descriptor + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + CLHOGGradient _gradient; + CLHOGOrientationBinningKernel _orient_bin; + CLHOGBlockNormalizationKernel _block_norm; + CLTensor _mag; + CLTensor _phase; + CLTensor _hog_space; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDESCRIPTOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h new file mode 100644 index 0000000000..0b4fad7766 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGDETECTOR_H__ +#define __ARM_COMPUTE_CLHOGDETECTOR_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel: + * + * -# @ref CLHOGDetectorKernel + * + */ +class CLHOGDetector : public IFunction +{ +public: + /** Default constructor */ + CLHOGDetector(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetector(const CLHOGDetector &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGDetector &operator=(const CLHOGDetector &) = delete; + /** Allow instances of this class to be moved */ + CLHOGDetector(CLHOGDetector &&) = default; + /** Allow instances of this class to be moved */ + CLHOGDetector &operator=(CLHOGDetector &&) = default; + /** Default destructor */ + ~CLHOGDetector() = default; + /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class + * + * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. + * + * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 + * @param[in] hog HOG data-object that describes the HOG descriptor + * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the block stride stored in hog + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); + + // Inherited methods overridden: + void run() override; + +private: + CLHOGDetectorKernel _hog_detector_kernel; + ICLDetectionWindowArray *_detection_windows; + cl::Buffer _num_detection_windows; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGDETECTOR_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h new file mode 100644 index 0000000000..e74a68497f --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGGRADIENT_H__ +#define __ARM_COMPUTE_CLHOGGRADIENT_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLDerivative.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels: + * + * -# @ref CLDerivative + * -# @ref CLMagnitudePhaseKernel + * + */ +class CLHOGGradient : public IFunction +{ +public: + /** Default constructor */ + CLHOGGradient(); + /** Initialise the function's source, destinations, phase type and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8. + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. + * @param[out] output_phase Output tensor.(phase). Format supported: U8 + * @param[in] phase_type Type of @ref PhaseType + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + CLDerivative _derivative; + CLMagnitudePhaseKernel _mag_phase; + CLTensor _gx; + CLTensor _gy; +}; +} +#endif /*__ARM_COMPUTE_CLHOGGRADIENT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h new file mode 100644 index 0000000000..3fe0fa932a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ +#define __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/ICLMultiHOG.h" +#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLHOGDetector.h" +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels: + * + * -# @ref CLHOGGradient + * -# @ref CLHOGOrientationBinningKernel + * -# @ref CLHOGBlockNormalizationKernel + * -# @ref CLHOGDetector + * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) + * + * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: + * -# Phase type + -# Normalization type + -# L2 hysteresis threshold if the normalization type is L2HYS_NORM + * + */ +class CLHOGMultiDetection : public IFunction +{ +public: + /** Default constructor */ + CLHOGMultiDetection(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGMultiDetection(const CLHOGMultiDetection &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete; + /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. + * This container should store the HOG data-objects in descending or ascending cell_size width order. + * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects + * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects + * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object + * The dimension of this array must be the same of multi_hog->num_models() + * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. + * True if the non-maxima suppression stage has to be computed + * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage + * + */ + void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value = 0, + float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + + // Inherited method overridden: + void run() override; + +private: + CLHOGGradient _gradient_kernel; + std::unique_ptr _orient_bin_kernel; + std::unique_ptr _block_norm_kernel; + std::unique_ptr _hog_detect_kernel; + std::unique_ptr _non_maxima_kernel; + std::unique_ptr _hog_space; + std::unique_ptr _hog_norm_space; + ICLDetectionWindowArray *_detection_windows; + CLTensor _mag; + CLTensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; +}; +} + +#endif /* __ARM_COMPUTE_CLHOGMULTIDETECTION_H__ */ \ No newline at end of file diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h new file mode 100644 index 0000000000..90da687435 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHARRISCORNERS_H__ +#define __ARM_COMPUTE_CLHARRISCORNERS_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/ICLArray.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include + +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute harris corners detection. This function calls the following CL and NEON kernels and functions: + * + * @note Requires CPU support for the kernels: CPPCornerCandidatesKernel and CPPSortEuclideanDistanceKernel. + * + * -# @ref CLSobel3x3 (if gradient_size == 3) or
+ * @ref CLSobel5x5 (if gradient_size == 5) or
+ * @ref CLSobel7x7 (if gradient_size == 7) + * -# @ref CLFillBorderKernel + * -# @ref CLHarrisScoreKernel + * -# @ref CLNonMaximaSuppression3x3 + * -# @ref CPPCornerCandidatesKernel + * -# @ref CPPSortEuclideanDistanceKernel + */ +class CLHarrisCorners : public IFunction +{ +public: + /** Constructor */ + CLHarrisCorners(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHarrisCorners(const CLHarrisCorners &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete; + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] min_dist Radial Euclidean distance for the euclidean distance stage. + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 + * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLImage *input, float threshold, float min_dist, float sensitivity, + int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Sobel function */ + CLHarrisScoreKernel _harris_score; /**< Harris score kernel */ + CLNonMaximaSuppression3x3Kernel _non_max_suppr; /**< Non-maxima suppression function */ + CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ + CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ + CLFillBorderKernel _border_gx; /**< Border handler before running harris score */ + CLFillBorderKernel _border_gy; /**< Border handler before running harris score */ + CLImage _gx; /**< Source image - Gx component */ + CLImage _gy; /**< Source image - Gy component */ + CLImage _score; /**< Source image - Harris score */ + CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */ + std::unique_ptr _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ + int32_t _num_corner_candidates; /**< Number of potential corner candidates */ + ICLKeyPointArray *_corners; /**< Output corners array */ +}; +} +#endif /*__ARM_COMPUTE_CLHARRISCORNERS_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h new file mode 100644 index 0000000000..455b61812d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLHistogram.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLHISTOGRAM_H__ +#define __ARM_COMPUTE_CLHISTOGRAM_H__ + +#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLDistribution1D; +class ICLTensor; +using ICLTensor = ICLImage; + +/** Basic function to execute histogram. This function calls the following OpenCL kernels: + * + * -# @ref CLHistogramKernel + * -# @ref CLHistogramBorderKernel + * + */ +class CLHistogram : public IFunction +{ +public: + /* + * @ Default constructor + */ + CLHistogram(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLHistogram(const CLHistogram &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + const CLHistogram &operator=(const CLHistogram &) = delete; + /** Initialize the function + * + * @param[in] input Source image. Data types supported: U8 + * @param[out] output Output distribution. + */ + void configure(const ICLImage *input, ICLDistribution1D *output); + + // Inherited methods overridden: + void run() override; + +private: + CLHistogramKernel _kernel; /**< kernel to run */ + CLHistogramBorderKernel _kernel_border; /**< Border kernel to run */ +}; +} +#endif /*__ARM_COMPUTE_CLHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h new file mode 100644 index 0000000000..25fc549b29 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLINTEGRALIMAGE_H__ +#define __ARM_COMPUTE_CLINTEGRALIMAGE_H__ + +#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute integral image. This function calls the following OpenCL kernels: + * + * -# @ref CLIntegralImageHorKernel + * -# @ref CLIntegralImageVertKernel + * + */ +class CLIntegralImage : public IFunction +{ +public: + /** Default Constructor. */ + CLIntegralImage(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] output Destination tensor, Data types supported: U32. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +protected: + CLIntegralImageHorKernel _integral_hor; /**< Integral Image Horizontal kernel */ + CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */ +}; +} +#endif /*__ARM_COMPUTE_CLINTEGRALIMAGE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h new file mode 100644 index 0000000000..0c6708aa73 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ +#define __ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute laplacian pyramid. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLGaussianPyramidHalf + * -# @ref CLGaussian5x5 + * -# @ref CLArithmeticSubtraction + * + * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then + * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. + * L(i) = I(i) - Gaussian5x5(I(i)) + * Level 0 has always the same first two dimensions as the input tensor. +*/ +class CLLaplacianPyramid : public IFunction +{ +public: + /** Constructor */ + CLLaplacianPyramid(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: S16. + * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16. + * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: + * output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + size_t _num_levels; + CLGaussianPyramidHalf _gaussian_pyr_function; + std::unique_ptr _convf; + std::unique_ptr _subf; + CLDepthConvert _depth_function; + CLPyramid _gauss_pyr; + CLPyramid _conv_pyr; +}; +} +#endif /*__ARM_COMPUTE_CLLAPLACIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h new file mode 100644 index 0000000000..4bc7eb65ce --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ +#define __ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLScale.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute laplacian reconstruction. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLArithmeticAddition + * -# @ref CLScale + * -# @ref CLDepthConvert + * + * This function reconstructs the original image from a Laplacian Image Pyramid. + * + * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the + * resolution of the next pyramid level. + * + * I(n-2) = upsample( input + L(n-1) + * + * For each pyramid level i, except i=0 and i=n-1: + * I(i-1) = upsample(I(i) + L(i)) + * + * output = I(0) + L(0) +*/ +class CLLaplacianReconstruct : public IFunction +{ +public: + /** Constructor */ + CLLaplacianReconstruct(); + /** Initialise the function's source, destinations and border mode. + * + * The Output image must have the same size as the first level of the pyramid. + * The Input image must have the same size as the last level of the pyramid. + * + * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. + * + * @param[in] pyramid Laplacian pyramid tensors, Data types supported at each level: S16. + * @param[in] input Source tensor. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const CLPyramid *pyramid, const ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + CLPyramid _tmp_pyr; + std::unique_ptr _addf; + std::unique_ptr _scalef; + CLDepthConvert _depthf; +}; +} +#endif /*__ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h new file mode 100644 index 0000000000..b4e469196e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels: + * + * -# @ref CLLocallyConnectedLayerWeightsReshapeKernel (executed only once for each configuration) + * -# @ref CLIm2ColKernel + * -# @ref CLLocallyConnectedMatrixMultiplyKernel + * -# @ref CLCol2ImKernel + */ +class CLLocallyConnectedLayer : public IFunction +{ +public: + /** Default constructor */ + CLLocallyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + CLIm2ColKernel _input_im2col_kernel; + CLLocallyConnectedLayerWeightsReshapeKernel _weights_reshape_kernel; + CLLocallyConnectedMatrixMultiplyKernel _mm_kernel; + CLCol2ImKernel _output_col2im_kernel; + CLTensor _input_im2col_reshaped; + CLTensor _weights_reshaped; + CLTensor _gemm_output; + bool _is_first_run; +}; +} +#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h new file mode 100644 index 0000000000..dc5f9139b3 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMagnitude.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMAGNITUDE_H__ +#define __ARM_COMPUTE_CLMAGNITUDE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLMagnitudePhaseKernel. */ +class CLMagnitude : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs. + * + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: S16. + * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); +}; +} +#endif /*__ARM_COMPUTE_CLMAGNITUDE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h new file mode 100644 index 0000000000..e33bcdd779 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEANSTDDEV_H__ +#define __ARM_COMPUTE_CLMEANSTDDEV_H__ + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */ +class CLMeanStdDev : public IFunction +{ +public: + /** Default Constructor. */ + CLMeanStdDev(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data types supported: U8. + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional)Output standard deviation of pixel values. + */ + void configure(const ICLImage *input, float *mean, float *stddev = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ +}; +} +#endif /*__ARM_COMPUTE_CLMEANSTDDEV_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h new file mode 100644 index 0000000000..af84ba7289 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMEDIAN3X3_H__ +#define __ARM_COMPUTE_CLMEDIAN3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute median filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLMedian3x3Kernel + * + */ +class CLMedian3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLMEDIAN3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h new file mode 100644 index 0000000000..84fd67515b --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLMINMAXLOCATION_H__ +#define __ARM_COMPUTE_CLMINMAXLOCATION_H__ + +#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; +using ICLImage = ICLTensor; + +/** Basic function to execute min and max location. This function calls the following OpenCL kernels: + * + * -# @ref CLMinMaxKernel + * -# @ref CLMinMaxLocationKernel + */ +class CLMinMaxLocation : public IFunction +{ +public: + /** Constructor */ + CLMinMaxLocation(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocation(const CLMinMaxLocation &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLMinMaxLocation &operator=(const CLMinMaxLocation &) = delete; + /** Allow instances of this class to be moved */ + CLMinMaxLocation(CLMinMaxLocation &&) = default; + /** Allow instances of this class to be moved */ + CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default; + /** Initialise the kernel's inputs and outputs. + * + * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. + * + * @param[in] input Input image. Data types supported: U8 or S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. + * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. + * @param[out] min_count (Optional) Number of minimum value encounters. + * @param[out] max_count (Optional) Number of maximum value encounters. + */ + void configure(const ICLImage *input, int32_t *min, int32_t *max, + CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + CLMinMaxKernel _min_max_kernel; /**< Kernel that performs min/max */ + CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */ + cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */ + cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */ + int32_t *_min; /**< Minimum value. */ + int32_t *_max; /**< Maximum value. */ + uint32_t *_min_count; /**< Minimum value occurrences. */ + uint32_t *_max_count; /**< Maximum value occurrences. */ + CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */ + CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */ +}; +} +#endif /*__ARM_COMPUTE_CLMINMAXLOCATION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h new file mode 100644 index 0000000000..9eee33e0ba --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONLINEARFILTER_H__ +#define __ARM_COMPUTE_CLNONLINEARFILTER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute non linear filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLNonLinearFilterKernel + * + * @note Supported mask dimensions squares of sizes 3, 5 + */ +class CLNonLinearFilter : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLNONLINEARFILTER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h new file mode 100644 index 0000000000..7adced4313 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ +#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLNonMaximaSuppression3x3Kernel + */ +class CLNonMaximaSuppression3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * The constant values used with CONSTANT border mode is 0 + * + * @param[in,out] input Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input. + * @param[in] border_mode Border mode to use for non-maxima suppression. + * The implementation supports just 2 border modes: UNDEFINED and CONSTANT + */ + void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode); +}; +} +#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h new file mode 100644 index 0000000000..a4dae85c1d --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to simulate a normalization layer. This function calls the following CL kernels: + * + * -# @ref CLPixelWiseMultiplicationKernel + * -# @ref CLFillBorderKernel + * -# @ref CLNormalizationLayerKernel + * + */ +class CLNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + CLNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run() override; + +private: + CLTensor _squared_input; /**< The intermediate buffer which stores results of squaring input*/ + CLNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */ + CLPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */ + CLFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h new file mode 100644 index 0000000000..ca3f86100e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLOPTICALFLOW_H__ +#define __ARM_COMPUTE_CLOPTICALFLOW_H__ + +#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class CLPyramid; + +using CLLKInternalKeypointArray = CLArray; +using CLCoefficientTableArray = CLArray; +using CLOldValueArray = CLArray; + +/** Basic function to execute optical flow. This function calls the following OpenCL kernels and functions: + * + * -# @ref CLScharr3x3 + * -# @ref CLLKTrackerInitKernel + * -# @ref CLLKTrackerStage0Kernel + * -# @ref CLLKTrackerStage1Kernel + * -# @ref CLLKTrackerFinalizeKernel + */ +class CLOpticalFlow : public IFunction +{ +public: + /** Default constructor */ + CLOpticalFlow(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOpticalFlow(const CLOpticalFlow &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLOpticalFlow &operator=(const CLOpticalFlow &) = delete; + /** Allow instances of this class to be moved */ + CLOpticalFlow(CLOpticalFlow &&) = default; + /** Allow instances of this class to be moved */ + CLOpticalFlow &operator=(CLOpticalFlow &&) = default; + /** Initialise the function input and output + * + * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8 + * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data types supported U8 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] border_mode The border mode applied at scharr kernel stage + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT + * + */ + void configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid, + const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _tracker_init_kernel; + std::unique_ptr _tracker_stage0_kernel; + std::unique_ptr _tracker_stage1_kernel; + CLLKTrackerFinalizeKernel _tracker_finalize_kernel; + std::unique_ptr _func_scharr; + std::unique_ptr _scharr_gx; + std::unique_ptr _scharr_gy; + const ICLKeyPointArray *_old_points; + const ICLKeyPointArray *_new_points_estimates; + ICLKeyPointArray *_new_points; + std::unique_ptr _old_points_internal; + std::unique_ptr _new_points_internal; + std::unique_ptr _coefficient_table; + std::unique_ptr _old_values; + size_t _num_levels; +}; +} +#endif /*__ARM_COMPUTE_CLOPTICALFLOW_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h new file mode 100644 index 0000000000..7cdfab16e2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPhase.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPHASE_H__ +#define __ARM_COMPUTE_CLPHASE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute an @ref CLMagnitudePhaseKernel. */ +class CLPhase : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input1 First tensor input. Data types supported: S16. + * @param[in] input2 Second tensor input. Data types supported: S16. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED); +}; +} +#endif /*__ARM_COMPUTE_CLPHASE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h new file mode 100644 index 0000000000..71754fc3f4 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ +#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLPixelWiseMultiplicationKernel. */ +class CLPixelWiseMultiplication : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8, S16, F16 or F32. + * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16 or F32. + * @param[out] output Output tensor. Data types supported: U8(Only if both inputs are U8), S16, F16 or F32. + * @param[in] scale Scale to apply after multiplication. Must be positive. + * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate + * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. + */ + void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); +}; +} +#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h new file mode 100644 index 0000000000..f92860e5b2 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLPOOLINGLAYER_H__ +#define __ARM_COMPUTE_CLPOOLINGLAYER_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if padding size is different from zero) + * -# @ref CLPoolingLayerKernel + */ +class CLPoolingLayer : public ICLSimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: F16, F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); +}; +} +#endif /* __ARM_COMPUTE_CLPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h new file mode 100644 index 0000000000..4cb2be90e7 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLRemap.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLREMAP_H__ +#define __ARM_COMPUTE_CLREMAP_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute remap. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLRemapKernel + */ +class CLRemap : public ICLSimpleFunction +{ +public: + /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] map_x Map for X coords. Data types supported: F32. + * @param[in] map_y Map for Y coords. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, + InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLREMAP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h new file mode 100644 index 0000000000..c2438ddf9b --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLScale.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCALE_H__ +#define __ARM_COMPUTE_CLSCALE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLScaleKernel */ +class CLScale : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8, S16. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSCALE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h new file mode 100644 index 0000000000..3ea0b84624 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSCHARR3X3_H__ +#define __ARM_COMPUTE_CLSCHARR3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLScharr3x3Kernel + * + */ +class CLScharr3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSCHARR3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h new file mode 100644 index 0000000000..7a4f47d0ed --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL3X3_H__ +#define __ARM_COMPUTE_CLSOBEL3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel3x3Kernel + * + */ +class CLSobel3x3 : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL3X3_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h new file mode 100644 index 0000000000..ad1f72faf8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL5X5_H__ +#define __ARM_COMPUTE_CLSOBEL5X5_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel5x5HorKernel + * -# @ref CLSobel5x5VertKernel + * + */ +class CLSobel5x5 : public IFunction +{ +public: + /** Default Constructor. */ + CLSobel5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL5X5_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h new file mode 100644 index 0000000000..1a3fe1a50a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOBEL7X7_H__ +#define __ARM_COMPUTE_CLSOBEL7X7_H__ + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels: + * + * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref CLSobel7x7HorKernel + * -# @ref CLSobel7x7VertKernel + * + */ +class CLSobel7x7 : public IFunction +{ +public: + /** Default Constructor. */ + CLSobel7x7(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32. + * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */ + CLImage _tmp_x; /**< Temporary buffer for Sobel X */ + CLImage _tmp_y; /**< Temporary buffer for Sobel Y */ +}; +} +#endif /*__ARM_COMPUTE_CLSOBEL7X7_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h new file mode 100644 index 0000000000..42cfc06fc4 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLSOFTMAXLAYER_H__ +#define __ARM_COMPUTE_CLSOFTMAXLAYER_H__ + +#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to compute a SoftmaxLayer. + * + * Softmax is calculated by : + * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f] + * + * This function runs the following kernels: + * -# @ref CLLogits1DMaxKernel + * -# @ref CLLogits1DShiftExpSumKernel + * -# @ref CLLogits1DNormKernel + */ +class CLSoftmaxLayer : public IFunction +{ +public: + /** Constructor */ + CLSoftmaxLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1. + * @param[out] output Destination tensor. Matching input type and channel number. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + CLLogits1DMaxKernel _max_kernel; + CLLogits1DShiftExpSumKernel _shift_exp_sum_kernel; + CLLogits1DNormKernel _norm_kernel; + CLTensor _max; + CLTensor _sum; + CLTensor _tmp; +}; +} +#endif /* __ARM_COMPUTE_CLSOFTMAXLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h new file mode 100644 index 0000000000..ebe6593b6a --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLTableLookup.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTABLELOOKUP_H__ +#define __ARM_COMPUTE_CLTABLELOOKUP_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; +class ICLLut; + +/** Basic function to run @ref CLTableLookupKernel */ +class CLTableLookup : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input First tensor input. Data types supported: U8 and S16 + * @param[in] lut Input lookup table. Data types supported: U8 and S16 + * @param[out] output Output tensor. Data types supported: U8 and S16 + */ + void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); +}; +} +#endif /*__ARM_COMPUTE_CLTABLELOOKUP_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h new file mode 100644 index 0000000000..14c05786c1 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLThreshold.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTHRESHOLD_H__ +#define __ARM_COMPUTE_CLTHRESHOLD_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLThresholdKernel */ +class CLThreshold : public ICLSimpleFunction +{ +public: + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold. + * @param[in] false_value Value to assign when the condition is false. + * @param[in] true_value value to assign when the condition is true. + * @param[in] type Thresholding type. Can either be BINARY or RANGE. + * @param[in] upper Upper threshold. Only used with RANGE thresholding + */ + void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, + uint8_t false_value = 0, uint8_t true_value = 0, + ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); +}; +} +#endif /*__ARM_COMPUTE_CLTHRESHOLD_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h new file mode 100644 index 0000000000..9b57fe00a8 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLTranspose.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLTRANSPOSE_H__ +#define __ARM_COMPUTE_CLTRANSPOSE_H__ + +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel: + * + * -# @ref CLTransposeKernel + * + */ +class CLTranspose : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ICLTensor *input, ICLTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_CLTRANSPOSE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h new file mode 100644 index 0000000000..aeab3f7b22 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPAFFINE_H__ +#define __ARM_COMPUTE_CLWARPAFFINE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */ +class CLWarpAffine : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in,out] input Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8. + * @param[in] matrix The affine matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLWARPAFFINE_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h new file mode 100644 index 0000000000..80237017aa --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLWARPPERSPECTIVE_H__ +#define __ARM_COMPUTE_CLWARPPERSPECTIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */ +class CLWarpPerspective : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: U8. + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_CLWARPPERSPECTIVE_H__ */ diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h new file mode 100644 index 0000000000..7a37e5ec21 --- /dev/null +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPPSCHEDULER_H__ +#define __ARM_COMPUTE_CPPSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +#include + +namespace arm_compute +{ +class Thread; + +/** C++11 implementation of a pool of threads to automatically split a kernel's execution among several threads. */ +class CPPScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then the maximum number of threads supported by C++11 will be used, otherwise the number of threads specified. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the CPPScheduler has in his pool. + * + * @return Number of threads available in CPPScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static CPPScheduler &get(); + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor: create a pool of threads. */ + CPPScheduler(); + + unsigned int _num_threads; + std::unique_ptr _threads; +}; +} +#endif /* __ARM_COMPUTE_CPPSCHEDULER_H__ */ diff --git a/arm_compute/runtime/Distribution1D.h b/arm_compute/runtime/Distribution1D.h new file mode 100644 index 0000000000..7080e88075 --- /dev/null +++ b/arm_compute/runtime/Distribution1D.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_DISTRIBUTION1D_H__ +#define __ARM_COMPUTE_DISTRIBUTION1D_H__ + +#include "arm_compute/core/IDistribution1D.h" + +#include +#include +#include + +namespace arm_compute +{ +/** Basic implementation of the 1D distribution interface */ +class Distribution1D : public IDistribution1D +{ +public: + /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1] + * defined by a start offset and valid range, divided equally into num_bins parts. + * + * @param[in] num_bins The number of bins the distribution is divided in. + * @param[in] offset The start of the values to use. + * @param[in] range The total number of the consecutive values of the distribution interval. + */ + Distribution1D(size_t num_bins, int32_t offset, uint32_t range); + + // Inherited methods overridden: + uint32_t *buffer() const override; + +private: + std::unique_ptr _data; /**< The distribution data. */ +}; +} +#endif /* __ARM_COMPUTE_DISTRIBUTION1D_H__ */ diff --git a/arm_compute/runtime/HOG.h b/arm_compute/runtime/HOG.h new file mode 100644 index 0000000000..70d8034bef --- /dev/null +++ b/arm_compute/runtime/HOG.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_HOG_H__ +#define __ARM_COMPUTE_HOG_H__ + +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** CPU implementation of HOG data-object */ +class HOG : public IHOG +{ +public: + /** Default constructor */ + HOG(); + /** Allocate the HOG descriptor using the given HOG's metadata + * + * @param[in] input HOG's metadata used to allocate the HOG descriptor + */ + void init(const HOGInfo &input); + + // Inherited method overridden: + const HOGInfo *info() const override; + float *descriptor() const override; + +private: + HOGInfo _info; + std::unique_ptr _descriptor; +}; +} +#endif /* __ARM_COMPUTE_HOG_H__ */ diff --git a/arm_compute/runtime/IFunction.h b/arm_compute/runtime/IFunction.h new file mode 100644 index 0000000000..a4e7ed15e0 --- /dev/null +++ b/arm_compute/runtime/IFunction.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IFUNCTION_H__ +#define __ARM_COMPUTE_IFUNCTION_H__ + +namespace arm_compute +{ +/** Base class for all functions */ +class IFunction +{ +public: + /** Run the kernels contained in the function + * + * For NEON kernels: + * - Multi-threading is used for the kernels which are parallelisable. + * - By default std::thread::hardware_concurrency() threads are used. + * + * @note @ref CPPScheduler::set_num_threads() can be used to manually set the number of threads + * + * For OpenCL kernels: + * - All the kernels are enqueued on the queue associated with CLScheduler. + * - The queue is then flushed. + * + * @note The function will not block until the kernels are executed. It is the user's responsibility to wait. + */ + virtual void run() = 0; + /** Destructor + * + */ + virtual ~IFunction() = default; +}; +} +#endif /*__ARM_COMPUTE_IFUNCTION_H__ */ diff --git a/arm_compute/runtime/ILutAllocator.h b/arm_compute/runtime/ILutAllocator.h new file mode 100644 index 0000000000..f23fbd2154 --- /dev/null +++ b/arm_compute/runtime/ILutAllocator.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ILUTALLOCATOR_H__ +#define __ARM_COMPUTE_ILUTALLOCATOR_H__ + +#include "arm_compute/core/Types.h" + +#include +#include + +namespace arm_compute +{ +/** Basic interface to allocate LUTs' */ +class ILutAllocator +{ +public: + /** Default constructor */ + ILutAllocator(); + /** Default virtual destructor */ + virtual ~ILutAllocator() = default; + /** Allow instances of this class to be move constructed */ + ILutAllocator(ILutAllocator &&) = default; + /** Allow instances of this class to be moved */ + ILutAllocator &operator=(ILutAllocator &&) = default; + /** Allocate an LUT of the requested number of elements and data_type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + void init(size_t num_elements, DataType data_type); + /** Returns the total number of elements in the LUT. + * + * @return Total number of elements. + */ + size_t num_elements() const; + /** Returns the type of the LUT. + * + * @return The type of the LUT. + */ + DataType type() const; + /** Returns the total size in bytes of the LUT. + * + * @return Total size of the LUT in bytes. + */ + size_t size() const; + +protected: + /** Interface to be implemented by the child class to allocate the LUT. */ + virtual void allocate() = 0; + /** Interface to be implemented by the child class to lock the memory allocation for the CPU to access. + * + * @return Pointer to a CPU mapping of the memory + */ + virtual uint8_t *lock() = 0; + /** Interface to be implemented by the child class to unlock the memory allocation after the CPU is done accessing it. */ + virtual void unlock() = 0; + +private: + size_t _num_elements; /**< Number of elements allocated */ + DataType _data_type; /**< Data type of LUT elements. */ +}; +} +#endif /* __ARM_COMPUTE_ILUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h new file mode 100644 index 0000000000..39c027c6b7 --- /dev/null +++ b/arm_compute/runtime/IScheduler.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ISCHEDULER_H__ +#define __ARM_COMPUTE_ISCHEDULER_H__ + +namespace arm_compute +{ +class ICPPKernel; + +/** Scheduler interface to run kernels */ +class IScheduler +{ +public: + /** Destructor. */ + virtual ~IScheduler() = default; + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. + */ + virtual void set_num_threads(unsigned int num_threads) = 0; + /** Returns the number of threads that the SingleThreadScheduler has in his pool. + * + * @return Number of threads available in SingleThreadScheduler. + */ + virtual unsigned int num_threads() const = 0; + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + virtual void schedule(ICPPKernel *kernel, unsigned int split_dimension) = 0; +}; +} +#endif /* __ARM_COMPUTE_ISCHEDULER_H__ */ diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h new file mode 100644 index 0000000000..6103e436bc --- /dev/null +++ b/arm_compute/runtime/ITensorAllocator.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_ITENSORALLOCATOR_H__ +#define __ARM_COMPUTE_ITENSORALLOCATOR_H__ + +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +/** Interface to allocate tensors */ +class ITensorAllocator +{ +public: + /** Default constructor. */ + ITensorAllocator(); + /** Allow instances of this class to be copy constructed */ + ITensorAllocator(const ITensorAllocator &) = default; + /** Allow instances of this class to be copied */ + ITensorAllocator &operator=(const ITensorAllocator &) = default; + /** Allow instances of this class to be move constructed */ + ITensorAllocator(ITensorAllocator &&) = default; + /** Allow instances of this class to be moved */ + ITensorAllocator &operator=(ITensorAllocator &&) = default; + /** Default virtual destructor. */ + virtual ~ITensorAllocator() = default; + + /** Initialize a tensor based on the passed @ref TensorInfo. + * + * @param[in] input TensorInfo object containing the description of the tensor to initialize. + */ + void init(const TensorInfo &input); + /** Return a reference to the tensor's metadata + * + * @return Reference to the tensor's metadata. + */ + TensorInfo &info(); + /** Return a constant reference to the tensor's metadata + * + * @return Constant reference to the tensor's metadata. + */ + const TensorInfo &info() const; + + /** Interface to be implemented by the child class to allocate the tensor. + * + * @note The child is expected to use the TensorInfo to get the size of the memory allocation. + * @warning The tensor must not already be allocated. Otherwise calling the function will fail. + */ + virtual void allocate() = 0; + + /** Interface to be implemented by the child class to free the allocated tensor. + * + * @warning The tensor must have been allocated previously. Otherwise calling the function will fail. + */ + virtual void free() = 0; + +protected: + /** Interface to be implemented by the child class to lock the memory allocation for the CPU to access. + * + * @return Pointer to a CPU mapping of the memory + */ + virtual uint8_t *lock() = 0; + /** Interface to be implemented by the child class to unlock the memory allocation after the CPU is done accessing it. */ + virtual void unlock() = 0; + +private: + TensorInfo _info; /**< Tensor's metadata. */ +}; +} +#endif /*__ARM_COMPUTE_ITENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/Lut.h b/arm_compute/runtime/Lut.h new file mode 100644 index 0000000000..87431feee4 --- /dev/null +++ b/arm_compute/runtime/Lut.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LUT_H__ +#define __ARM_COMPUTE_LUT_H__ + +#include "arm_compute/core/ILut.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/LutAllocator.h" + +#include +#include + +namespace arm_compute +{ +class ILutAllocator; + +/** Basic implementation of the LUT interface */ +class Lut : public ILut +{ +public: + /** Constructor */ + Lut(); + /** Constructor: initializes a LUT which can contain num_values values of data_type type. + * + * @param[in] num_elements Number of elements of the LUT. + * @param[in] data_type Data type of each element. + */ + Lut(size_t num_elements, DataType data_type); + /** Return a pointer to the lut's allocator + * + * @return A pointer to the lut's allocator + */ + ILutAllocator *allocator(); + + // Inherited methods overridden: + size_t num_elements() const override; + uint32_t index_offset() const override; + size_t size_in_bytes() const override; + DataType type() const override; + uint8_t *buffer() const override; + void clear() override; + +private: + LutAllocator _allocator; /**< Instance of the basic CPU allocator.*/ +}; +} +#endif /* __ARM_COMPUTE_LUT_H__ */ diff --git a/arm_compute/runtime/LutAllocator.h b/arm_compute/runtime/LutAllocator.h new file mode 100644 index 0000000000..76b596bfa0 --- /dev/null +++ b/arm_compute/runtime/LutAllocator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_LUTALLOCATOR_H__ +#define __ARM_COMPUTE_LUTALLOCATOR_H__ + +#include "arm_compute/runtime/ILutAllocator.h" + +#include +#include + +namespace arm_compute +{ +/** Basic implementation of a CPU memory LUT allocator. */ +class LutAllocator : public ILutAllocator +{ +public: + /** Default constructor. */ + LutAllocator(); + /** Interface to be implemented by the child class to return the pointer to the allocate data. */ + uint8_t *data() const; + +protected: + /** Allocate num_elements() * sizeof(type()) of CPU memory. */ + void allocate() override; + /** No-op for CPU memory + * + * @return A pointer to the beginning of the look up table's allocation. + */ + uint8_t *lock() override; + /** No-op for CPU memory. */ + void unlock() override; + +private: + std::unique_ptr _buffer; /**< CPU memory allocation. */ +}; +} +#endif /* __ARM_COMPUTE_LUTALLOCATOR_H__ */ diff --git a/arm_compute/runtime/MultiHOG.h b/arm_compute/runtime/MultiHOG.h new file mode 100644 index 0000000000..32bad70738 --- /dev/null +++ b/arm_compute/runtime/MultiHOG.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIHOG_H__ +#define __ARM_COMPUTE_MULTIHOG_H__ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/HOG.h" + +#include + +namespace arm_compute +{ +/** CPU implementation of multi HOG data-object */ +class MultiHOG : public IMultiHOG +{ +public: + /** Constructor + * + * @param[in] num_models Number of HOG data objects to contain + * + */ + MultiHOG(size_t num_models); + + // Inherited methods overridden: + size_t num_models() const override; + IHOG *model(size_t index) override; + const IHOG *model(size_t index) const override; + +private: + size_t _num_models; + std::unique_ptr _model; +}; +} + +#endif /* __ARM_COMPUTE_MULTIHOG_H__ */ diff --git a/arm_compute/runtime/MultiImage.h b/arm_compute/runtime/MultiImage.h new file mode 100644 index 0000000000..917e586ef8 --- /dev/null +++ b/arm_compute/runtime/MultiImage.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_MULTIIMAGE_H__ +#define __ARM_COMPUTE_MULTIIMAGE_H__ + +#include "arm_compute/core/IMultiImage.h" +#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class Coordinates; +class ITensor; +using IImage = ITensor; + +/** Basic implementation of the multi-planar image interface */ +class MultiImage : public IMultiImage +{ +public: + /** Constructor */ + MultiImage(); + /** Allocate the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init(unsigned int width, unsigned int height, Format format); + /** Allocate the multi-planar image + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + */ + void init_auto_padding(unsigned int width, unsigned int height, Format format); + /** Allocated a previously initialised multi image + * + * @note The multi image must not already be allocated when calling this function. + * + **/ + void allocate(); + /** Create a subimage from an existing MultiImage. + * + * @param[in] image Image to use backing memory from + * @param[in] coords Starting coordinates of the new image. Should be within the parent image sizes + * @param[in] width The width of the subimage + * @param[in] height The height of the subimage + */ + void create_subimage(MultiImage *image, const Coordinates &coords, unsigned int width, unsigned int height); + + // Inherited methods overridden: + const MultiImageInfo *info() const override; + Image *plane(unsigned int index) override; + const Image *plane(unsigned int index) const override; + +private: + /** Init the multi-planar image + * + * @param[in] width Width of the whole image + * @param[in] height Height of the whole image + * @param[in] format Format of the whole image + * @param[in] auto_padding Specifies whether the image uses auto padding + */ + void internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding); + + MultiImageInfo _info; /** Instance of the multi-planar image's meta data */ + std::array _plane; /* Instance Image to hold the planar's information */ +}; +} +#endif /*__ARM_COMPUTE_MULTIIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h new file mode 100644 index 0000000000..6e000d8fd8 --- /dev/null +++ b/arm_compute/runtime/NEON/INESimpleFunction.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INESIMPLEFUNCTION_H__ +#define __ARM_COMPUTE_INESIMPLEFUNCTION_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single NEON kernel */ +class INESimpleFunction : public IFunction +{ +public: + /** Constructor */ + INESimpleFunction(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ + NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ +}; +} +#endif /*__ARM_COMPUTE_INESIMPLEFUNCTION_H__ */ diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h new file mode 100644 index 0000000000..daf76f3a87 --- /dev/null +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFUNCTIONS_H__ +#define __ARM_COMPUTE_NEFUNCTIONS_H__ + +/* Header regrouping all the NEON functions */ +#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" +#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" +#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" +#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" +#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" +#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" +#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" +#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" +#include "arm_compute/runtime/NEON/functions/NEConvolution.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEDerivative.h" +#include "arm_compute/runtime/NEON/functions/NEDilate.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" +#include "arm_compute/runtime/NEON/functions/NEErode.h" +#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" +#include "arm_compute/runtime/NEON/functions/NEFillBorder.h" +#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/runtime/NEON/functions/NEGEMM.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMLowp.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" +#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" +#include "arm_compute/runtime/NEON/functions/NEHistogram.h" +#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" +#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" +#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" +#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h" +#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" +#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" +#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" +#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" +#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" +#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" +#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" +#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" +#include "arm_compute/runtime/NEON/functions/NEPhase.h" +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/NEON/functions/NERemap.h" +#include "arm_compute/runtime/NEON/functions/NEScale.h" +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" +#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" +#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" +#include "arm_compute/runtime/NEON/functions/NETableLookup.h" +#include "arm_compute/runtime/NEON/functions/NEThreshold.h" +#include "arm_compute/runtime/NEON/functions/NETranspose.h" +#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" +#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" + +#endif /* __ARM_COMPUTE_NEFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/NEON/NEScheduler.h b/arm_compute/runtime/NEON/NEScheduler.h new file mode 100644 index 0000000000..94c82b2f03 --- /dev/null +++ b/arm_compute/runtime/NEON/NEScheduler.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHEDULER_H__ +#define __ARM_COMPUTE_NESCHEDULER_H__ + +#include "arm_compute/runtime/Scheduler.h" + +namespace arm_compute +{ +using NEScheduler = Scheduler; +} +#endif /*__ARM_COMPUTE_NESCHEDULER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h new file mode 100644 index 0000000000..266a27586a --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ +#define __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEAbsoluteDifferenceKernel + * + * @note The image data type for the inputs must be U8 or S16 + * @note The function calculates the absolute difference also when the 2 inputs have different image data types + */ +class NEAbsoluteDifference : public INESimpleFunction +{ +public: + /** Set the inputs and output images + * + * @param[in] input1 Source tensor. Data types supported: U8/S16. + * @param[in] input2 Source tensor. Data types supported: U8/S16. + * @param[out] output Destination tensor. Data types supported: U8/S16. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h new file mode 100644 index 0000000000..de532c37a0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACCUMULATE_H__ +#define __ARM_COMPUTE_NEACCUMULATE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEAccumulateKernel */ +class NEAccumulate : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: S16. + */ + void configure(const ITensor *input, ITensor *output); +}; + +/** Basic function to run @ref NEAccumulateWeightedKernel */ +class NEAccumulateWeighted : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors, and the scale value + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] alpha The input scalar value with a value input the range of [0, 1.0] + * @param[in,out] output Accumulated tensor. Data type supported: U8. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false); +}; + +/** Basic function to run @ref NEAccumulateSquaredKernel */ +class NEAccumulateSquared : public INESimpleFunction +{ +public: + /** Set the input and accumulation tensors and the shift value. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[in] shift The input with a value input the range of [0, 15] + * @param[in,out] output Accumulated tensor. Data type supported: S16. + */ + void configure(const ITensor *input, uint32_t shift, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEACCUMULATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h new file mode 100644 index 0000000000..35366e16fb --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEACTIVATIONLAYER_H__ +#define __ARM_COMPUTE_NEACTIVATIONLAYER_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEActivationLayerKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class NEActivationLayer : public INESimpleFunction +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor. Data type supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer parameters. + */ + void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); +}; +} +#endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h new file mode 100644 index 0000000000..8e34e983c7 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICADDITION_H__ +#define __ARM_COMPUTE_NEARITHMETICADDITION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEArithmeticAdditionKernel */ +class NEArithmeticAddition : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8/S16. + * @param[in] input2 Second tensor input. Data types supported: U8/S16. + * @param[out] output Output tensor. Data types supported: U8/S16. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); +}; +} +#endif /*__ARM_COMPUTE_NEARITHMETICADDITION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h new file mode 100644 index 0000000000..841b5912b9 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ +#define __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEArithmeticSubtractionKernel */ +class NEArithmeticSubtraction : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8/S16. + * @param[in] input2 Second tensor input. Data types supported: U8/S16. + * @param[out] output Output tensor. Data types supported: U8/S16. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); +}; +} +#endif /* __ARM_COMPUTE_NEARITHMETICSUBTRACTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h new file mode 100644 index 0000000000..b0b5c122cb --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer. + * + * Batch normalization is calculated by: + * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * + */ +class NEBatchNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + NEBatchNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/F32. + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); + + // Inherited methods overridden: + void run() override; + +private: + NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */ +}; +} +#endif /* __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h new file mode 100644 index 0000000000..0250293e97 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEAND_H__ +#define __ARM_COMPUTE_NEBITWISEAND_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseAndKernel */ +class NEBitwiseAnd : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEAND_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h new file mode 100644 index 0000000000..62c08ffcf9 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISENOT_H__ +#define __ARM_COMPUTE_NEBITWISENOT_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseNotKernel */ +class NEBitwiseNot : public INESimpleFunction +{ +public: + /** Initialise the kernel's input and output + * + * @param[in] input Input tensor. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISENOT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h new file mode 100644 index 0000000000..1c9a2f9d2e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEOR_H__ +#define __ARM_COMPUTE_NEBITWISEOR_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseOrKernel */ +class NEBitwiseOr : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h new file mode 100644 index 0000000000..4690f0a4e3 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBITWISEXOR_H__ +#define __ARM_COMPUTE_NEBITWISEXOR_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEBitwiseXorKernel */ +class NEBitwiseXor : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input1 First tensor input. Data type supported: U8. + * @param[in] input2 Second tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /* __ARM_COMPUTE_NEBITWISEXOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h new file mode 100644 index 0000000000..2b5440a74c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEBox3x3.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEBOX3x3_H__ +#define __ARM_COMPUTE_NEBOX3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute box filter 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEBox3x3Kernel + * + */ +class NEBox3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's input, output and border mode. + * + * @note The border handler is run on the input tensor. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); +}; +} +#endif /*__ARM_COMPUTE_NEBOX3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h new file mode 100644 index 0000000000..fbf2d90740 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECANNYEDGE_H__ +#define __ARM_COMPUTE_NECANNYEDGE_H__ + +#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions: + * + * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) + * -# @ref NESobel3x3 (if gradient_size == 3) or + * @ref NESobel5x5 (if gradient_size == 5) or + * @ref NESobel7x7 (if gradient_size == 7) + * -# @ref NEGradientKernel + * -# @ref NEEdgeNonMaxSuppressionKernel + * -# @ref NEEdgeTraceKernel + * + */ +class NECannyEdge : public IFunction +{ +public: + /** Constructor + * + * Initialize Sobel kernel to nullptr. + */ + NECannyEdge(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECannyEdge(const NECannyEdge &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NECannyEdge &operator=(const NECannyEdge &) = delete; + /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8. + * @param[in] upper_thr Upper threhold used for the hysteresis + * @param[in] lower_thr Lower threshold used for the hysteresis. + * @param[in] gradient_size Gradient size (3, 5 or 7) + * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + * + */ + void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0, + bool use_fp16 = false); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Pointer to Sobel kernel */ + std::unique_ptr _gradient; /**< Gradient kernel */ + NEEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel */ + NEEdgeTraceKernel _edge_trace; /**< Edge tracing kernel */ + NEFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ + NEFillBorderKernel _border_edge_trace; /**< Fill border before edge trace */ + Tensor _gx; /**< Source tensor - Gx component */ + Tensor _gy; /**< Source tensor - Gy component */ + Tensor _magnitude; /**< Source tensor - Magnitude */ + Tensor _phase; /**< Source tensor - Phase */ + Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ + ITensor *_output; /**< Output tensor provided by the user. */ +}; +} +#endif /* __ARM_COMPUTE_NECANNYEDGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h new file mode 100644 index 0000000000..7133553e1d --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELCOMBINE_H__ +#define __ARM_COMPUTE_NECHANNELCOMBINE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */ +class NEChannelCombine : public INESimpleFunction +{ +public: + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 + * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 + */ + void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); + /** Initialize function's inputs and outputs. + * + * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 + * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 + * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 + * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444 + */ + void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECHANNELCOMBINE_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h new file mode 100644 index 0000000000..5e46eef3a6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECHANNELEXTRACT_H__ +#define __ARM_COMPUTE_NECHANNELEXTRACT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class IMultiImage; +class ITensor; +using IImage = ITensor; + +/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */ +class NEChannelExtract : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input tensor to extract the channel from. Formats supported: Any single planar. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Format supported: U8 + */ + void configure(const ITensor *input, Channel channel, ITensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image to extract channel from. + * @param[in] channel The channel to extract. + * @param[out] output The extracted channel. Format supported: U8 + */ + void configure(const IMultiImage *input, Channel channel, IImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECHANNELEXTRACT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h new file mode 100644 index 0000000000..2997778ed5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECOLORCONVERT_H__ +#define __ARM_COMPUTE_NECOLORCONVERT_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; +class IMultiImage; +using IImage = ITensor; + +/**Basic function to run @ref NEColorConvertKernel to perform color conversion */ +class NEColorConvert : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * @param[in] input The input single-planar tensor from which to convert + * @param[in] output The converted single-planar output tensor + */ + void configure(const ITensor *input, ITensor *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted single-planar output image + */ + void configure(const IMultiImage *input, IImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The single-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const IImage *input, IMultiImage *output); + /** Initialize the function's source, destination + * + * @param[in] input The multi-planar input image from which to convert + * @param[in] output The converted multi-planar output image + */ + void configure(const IMultiImage *input, IMultiImage *output); +}; +} +#endif /*__ARM_COMPUTE_NECOLORCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h new file mode 100644 index 0000000000..1704d9fa94 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConvolution.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTION_H__ +#define __ARM_COMPUTE_NECONVOLUTION_H__ + +#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolution3x3Kernel + * + */ +class NEConvolution3x3 : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8/S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; + +/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolutionKernel or
+ * @ref NESeparableConvolutionHorKernel and @ref NESeparableConvolutionVertKernel (if convolution matrix is separable) + * + */ +template +class NEConvolutionSquare : public IFunction +{ +public: + /** Default constructor */ + NEConvolutionSquare(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + bool _is_separable; /**< true if the convolution can be separated */ + NESeparableConvolutionHorKernel _kernel_hor; /**< kernel for horizontal pass of separated convolution */ + NESeparableConvolutionVertKernel _kernel_vert; /**< kernel for vertical pass of separated convolution */ + NEConvolutionKernel _kernel; /**< kernel for non-separated convolution **/ + NEFillBorderKernel _border_handler; /**< kernel for border handling */ +}; + +/** Basic function to run 5x5 convolution. */ +using NEConvolution5x5 = NEConvolutionSquare<5>; +/** Basic function to run 7x7 convolution. */ +using NEConvolution7x7 = NEConvolutionSquare<7>; +/** Basic function to run 9x9 convolution. */ +using NEConvolution9x9 = NEConvolutionSquare<9>; + +/** Basic function to execute non-square convolution. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEConvolutionRectangleKernel or
+ * + * @note Convolution rectangle should have dimensions of 3, 5, 7, 9 + */ +class NEConvolutionRectangle : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data types supported: U8 or S16. + * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer. + * @param[in] rows Rows of convolution kernel. + * @param[in] cols Columns of convolution kernel. + * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NECONVOLUTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h new file mode 100644 index 0000000000..a8fff8d047 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class ITensor; + +/** Function to reshape and perform 1xW transposition on the weights. This function calls the following kernels: + * -# @ref NEWeightsReshapeKernel + * -# @ref NEGEMMTranspose1xWKernel (executed in case GEMM is required for the operation) + */ +class NEConvolutionLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + NEConvolutionLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QS8/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. + * Data types supported: Same as @p weights. + */ + void configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW); + // Inherited methods overridden: + void run() override; + +private: + NEWeightsReshapeKernel _weights_reshape_kernel; + NEGEMMTranspose1xWKernel _weights_transposed_kernel; + Tensor _weights_reshaped; + bool _transpose1xW; +}; + +/** Basic function to simulate a convolution layer. This function calls the following NEON kernels: + * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) + * -# @ref NEIm2ColKernel + * -# @ref NEGEMMInterleave4x4Kernel (executed only in case GEMM is required for the operation) + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NECol2ImKernel + */ +class NEConvolutionLayer : public IFunction +{ +public: + /** Constructor */ + NEConvolutionLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QS8/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + // Inherited methods overridden: + void run() override; + +private: + NEIm2ColKernel _input_im2col_kernel; + NEGEMMInterleave4x4Kernel _input_interleave_kernel; + NEConvolutionLayerReshapeWeights _reshape_weights; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _input_interleaved_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + bool _has_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; +}; +} +#endif /* __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h new file mode 100644 index 0000000000..02ff1227c7 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +class NEDepthConcatenateKernel; +class NEFillBorderKernel; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref NEDepthConcatenateKernel + * + */ +class NEDepthConcatenate : public IFunction +{ +public: + /** Default constructor */ + NEDepthConcatenate(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. + * @param[out] output Output tensor. Data types supported: F32. + */ + void configure(std::vector inputs_vector, ITensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::vector _inputs_vector; + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvert.h b/arm_compute/runtime/NEON/functions/NEDepthConvert.h new file mode 100644 index 0000000000..7c59ce432d --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDepthConvert.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDEPTHCONVERT_H__ +#define __ARM_COMPUTE_NEDEPTHCONVERT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/**Basic function to run @ref NEDepthConvertKernel */ +class NEDepthConvert : public INESimpleFunction +{ +public: + /* Contructor */ + NEDepthConvert() = default; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + NEDepthConvert(const NEDepthConvert &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers)*/ + const NEDepthConvert &operator=(const NEDepthConvert &) = delete; + /** Initialize the function's source, destination + * + * Input format must be different than output format. + * + * Valid conversions Input -> Output : + * QS8 -> F32 + * U8 -> U16, S16, S32 + * U16 -> U8, U32 + * S16 -> U8, S32 + * F32 -> QS8 + * + * + * @param[in] input The input tensor to convert. Data type supported: QS8/U8/U16/S16/F32. + * @param[out] output The output tensor. Data type supported: QS8/U8/U16/S16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. + * It is not used on fixed point conversion. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift); +}; +} +#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h new file mode 100644 index 0000000000..57b7409b39 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDerivative.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDERIVATIVE_H__ +#define __ARM_COMPUTE_NEDERIVATIVE_H__ + +#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute first order derivative operator. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEDerivativeKernel + * + */ +class NEDerivative : public IFunction +{ +public: + /** Default constructor */ + NEDerivative(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16. + * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + NEDerivativeKernel _kernel; /**< Derivative kernel */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /* __ARM_COMPUTE_NEDERIVATIVE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h new file mode 100644 index 0000000000..17bdb3363e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDilate.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDILATE_H__ +#define __ARM_COMPUTE_NEDILATE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute dilate. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEDilateKernel + * + */ +class NEDilate : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode. + * + * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_NEDILATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h new file mode 100644 index 0000000000..a356cac7c8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h" +#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Function to run the direct convolution. + * + * This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel for the input + * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel + * -# @ref NEDirectConvolutionLayerKernel + */ +class NEDirectConvolutionLayer : public IFunction +{ +public: + /** Constructor */ + NEDirectConvolutionLayer(); + /** Set the input, weights, biases and output tensors. + * + * @param[in, out] input Input tensor. Data types supported: QS8/F32. + * @param[in] weights Set of kernels to convolve the input volume. + * The 3rd dimension must be the same as the input's volume 3rd dimension. + * Data type supported: Same as @p input. + * @param[in] bias Set of biases. Data type supported: Same as @p input. + * @param[out] output Output tensor. + * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel; + NEDirectConvolutionLayerKernel _conv_kernel; + NEFillBorderKernel _input_border_handler; + Tensor _accumulator; +}; +} +#endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h new file mode 100644 index 0000000000..6cf8008480 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ +#define __ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ + +#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" +#include "arm_compute/runtime/Distribution1D.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Lut.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute histogram equalization. This function calls the following NEON kernels: + * + * -# @ref NEHistogramKernel + * -# @ref NECumulativeDistributionKernel + * -# @ref NETableLookupKernel + * + */ +class NEEqualizeHistogram : public IFunction +{ +public: + /** Default Constructor. */ + NEEqualizeHistogram(); + /** Initialise the kernel's inputs. + * + * @note Currently the width of the input image must be a multiple of 16. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] output Output image. Data type supported: same as @p input + */ + void configure(const IImage *input, IImage *output); + + // Inherited methods overridden: + void run() override; + +private: + NEHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */ + NECumulativeDistributionKernel _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution + and creates the relevant LookupTable. */ + NETableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ + Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ + Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ + Lut _cd_lut; /**< Holds the equalization lookuptable. */ + static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ + static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ +}; +} +#endif /*__ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h new file mode 100644 index 0000000000..940ae18471 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEErode.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEERODE_H__ +#define __ARM_COMPUTE_NEERODE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute erode. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEErodeKernel + * + */ +class NEErode : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and border mode + * + * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); +}; +} +#endif /*__ARM_COMPUTE_NEERODE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h new file mode 100644 index 0000000000..d7c31750c5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFastCorners.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFASTCORNERS_H__ +#define __ARM_COMPUTE_NEFASTCORNERS_H__ + +#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute fast corners. This function call the following NEON kernels: + * + * -# @ref NEFastCornersKernel + * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) + * -# @ref NEFillArrayKernel + * + */ +class NEFastCorners : public IFunction +{ +public: + /** Constructor */ + NEFastCorners(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. + * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + NEFastCornersKernel _fast_corners_kernel; + NEFillBorderKernel _border_handler; + NENonMaximaSuppression3x3Kernel _nonmax_kernel; + NEFillArrayKernel _fill_kernel; + Image _output; + Image _suppressed; + bool _non_max; +}; +} +#endif /*__ARM_COMPUTE_NEFASTCORNERS_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h new file mode 100644 index 0000000000..b6b7e77471 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFILLBORDER_H__ +#define __ARM_COMPUTE_NEFILLBORDER_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEFillBorderKernel */ +class NEFillBorder : public IFunction +{ +public: + /** Initialize the function's source, destination and border_mode. + * + * @note This function fills the borders within the XY-planes. + * + * @param[in, out] input Source tensor. Data type supported: U8/QS8/S16/S32/F32 + * @param[in] border_width Width of the tensor border in pixels. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + // Inherited methods overridden: + void run() override; + +private: + NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ +}; +} +#endif /*__ARM_COMPUTE_NEFILLBORDER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h new file mode 100644 index 0000000000..33ec4ef721 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: + * + * -# @ref NETransposeKernel (if @p transpose_weights is set to true) + * -# @ref NEGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class NEFullyConnectedLayerReshapeWeights : public IFunction +{ +public: + /** Constructor */ + NEFullyConnectedLayerReshapeWeights(); + /** Set the input and output tensors. + * + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. + * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + */ + void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer); + + // Inherited methods overridden: + void run() override; + +private: + NETransposeKernel _transpose_kernel; + NEGEMMTranspose1xWKernel _transpose1xW_kernel; + Tensor _transpose_output; + bool _transpose_weights; + bool _is_batched_fc_layer; +}; + +/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: + * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped flag is set to false) (called once) + * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input) + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class NEFullyConnectedLayer : public IFunction +{ +public: + /** Constructor */ + NEFullyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data type supported: QS8/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input. + * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose the weights tensor if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); + + //Inherited methods override + void run() override; + +private: + void configure_fc_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_fc_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output); + + NEIm2ColKernel _im2col_kernel; + NEFullyConnectedLayerReshapeWeights _reshape_weights_kernel; + NEGEMMInterleave4x4Kernel _interleave4x4_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + Tensor _im2col_output; + Tensor _interleave4x4_output; + Tensor _reshape_weights_output; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _is_batched_fc_layer; + bool _accumulate_biases; +}; +} +#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h new file mode 100644 index 0000000000..a40aa910a5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMM_H__ +#define __ARM_COMPUTE_NEGEMM_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: + * + * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref NEGEMMMatrixMultiplyKernel + * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * + */ +class NEGEMM : public IFunction +{ +public: + /** Constructor */ + NEGEMM(); + /** Initialise the kernel's inputs, output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. + * + * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: QS8/F16/F32 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a + * @param[out] d Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + */ + void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta); + + // Inherited methods overridden: + void run() override; + +private: + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + NEGEMMMatrixAdditionKernel _ma_kernel; + Tensor _tmp_a; + Tensor _tmp_b; + bool _run_vector_matrix_multiplication; + bool _run_addition; +}; +} +#endif /*__ARM_COMPUTE_NEGEMM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h new file mode 100644 index 0000000000..b911fd064f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ +#define __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute NEGEMMInterleave4x4Kernel. This function calls the following NEON kernel: + * + * -# @ref NEGEMMInterleave4x4Kernel + * + */ +class NEGEMMInterleave4x4 : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowp.h b/arm_compute/runtime/NEON/functions/NEGEMMLowp.h new file mode 100644 index 0000000000..bfb1a494b8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowp.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMLOWP_H__ +#define __ARM_COMPUTE_NEGEMMLOWP_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute GEMMLowp on NEON. This function calls the following NEON kernels: +* +* -# @ref NEGEMMInterleave4x4Kernel +* -# @ref NEGEMMTranspose1xWKernel +* -# @ref NEGEMMLowpMatrixMultiplyKernel +* +*/ +class NEGEMMLowp : public IFunction +{ +public: + /** Constructor */ + NEGEMMLowp(); + /** Initialise the kernel's inputs, output + * + * @note GEMM_LOWP: low precision GEMM kernel + * This kernel performs the following computation: + * + * -# Convert a values from uint8 to int32 and add a_offset to each of them. + * -# Convert b values from uint8 to int32 and add b_offset to each of them. + * -# Compute the int32 matrix product of the resulting a * b. + * -# Add output_offset to each entry of the result. + * -# Multiply each entry of the result and round to the nearest integer + * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8. + * + * @param[in] a First input tensor (Matrix A). Data type supported: U8. + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a + * @param[out] output Output tensor. Data type supported: same as @p a. + * @param[in] a_offset Offset to be added to each element of the matrix A. + * @param[in] b_offset Offset to be added to each element of the matrix B. + * @param[in] output_offset Offset to be added to each element of the output matrix + * @param[in] output_mult_int Value to be multiplied to each element of the output matrix + * @param[in] shift Number of bits to shift right the result. + */ + void configure(const ITensor *a, const ITensor *b, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift); + // Inherited methods overridden: + void run() override; + +private: + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMLowpMatrixMultiplyKernel _mm_kernel; + Tensor _tmp_a; + Tensor _tmp_b; +}; +} +#endif /*__ARM_COMPUTE_NEGEMMLOWP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h new file mode 100644 index 0000000000..447b8c9c70 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ +#define __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels: + * + * -# @ref NEGEMMTranspose1xWKernel + * + */ +class NEGEMMTranspose1xW : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/ + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h new file mode 100644 index 0000000000..a237e6f0e5 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN3x3_H__ +#define __ARM_COMPUTE_NEGAUSSIAN3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute gaussian filter 3x3. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian3x3Kernel + * + */ +class NEGaussian3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's input, output and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h new file mode 100644 index 0000000000..699e42efb4 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIAN5x5_H__ +#define __ARM_COMPUTE_NEGAUSSIAN5x5_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian5x5HorKernel + * -# @ref NEGaussian5x5VertKernel + * + */ +class NEGaussian5x5 : public IFunction +{ +public: + /** Default constructor + */ + NEGaussian5x5(); + /** Initialise the function's input, output and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NEGaussian5x5HorKernel _kernel_hor; /**< kernel for horizontal pass */ + NEGaussian5x5VertKernel _kernel_vert; /**< kernel for vertical pass */ + Tensor _tmp; /**< temporary buffer for output of horizontal pass */ + NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIAN5x5_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h new file mode 100644 index 0000000000..5f0a67ea05 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ +#define __ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/Pyramid.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Common interface for all Gaussian pyramid functions */ +class NEGaussianPyramid : public IFunction +{ +public: + /** Default constructor */ + NEGaussianPyramid(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramid(const NEGaussianPyramid &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete; + /** Allow instances of this class to be moved */ + NEGaussianPyramid(NEGaussianPyramid &&) = default; + /** Allow instances of this class to be moved */ + NEGaussianPyramid &operator=(NEGaussianPyramid &&) = default; + /** Default destructor */ + virtual ~NEGaussianPyramid() = default; + + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8. + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0; + +protected: + const ITensor *_input; + IPyramid *_pyramid; + Pyramid _tmp; +}; + +/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussianPyramidHorKernel + * -# @ref NEGaussianPyramidVertKernel + * + */ +class NEGaussianPyramidHalf : public NEGaussianPyramid +{ +public: + /** Constructor */ + NEGaussianPyramidHalf(); + + // Inherited methods overridden: + void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _border_handler; + std::unique_ptr _horizontal_reduction; + std::unique_ptr _vertical_reduction; +}; + +/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEGaussian5x5 + * -# @ref NEScaleKernel + * + */ +class NEGaussianPyramidOrb : public NEGaussianPyramid +{ +public: + /** Constructor */ + NEGaussianPyramidOrb(); + + // Inherited methods overridden: + void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; + void run() override; + +private: + std::unique_ptr _offsets; + std::unique_ptr _gaus5x5; + std::unique_ptr _scale_nearest; +}; +} +#endif /*__ARM_COMPUTE_NEGAUSSIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h new file mode 100644 index 0000000000..b7b4909060 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ +#define __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ + +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class IHOG; +/** Basic function to calculate HOG descriptor. This function calls the following NEON kernels: + * + * -# @ref NEHOGGradient + * -# @ref NEHOGOrientationBinningKernel + * -# @ref NEHOGBlockNormalizationKernel + * + */ +class NEHOGDescriptor : public IFunction +{ +public: + /** Default constructor */ + NEHOGDescriptor(); + /** Initialise the function's source, destination, HOG data-object and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block + * @param[in] hog HOG data object which describes the HOG descriptor + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + NEHOGGradient _gradient; + NEHOGOrientationBinningKernel _orient_bin; + NEHOGBlockNormalizationKernel _block_norm; + Tensor _mag; + Tensor _phase; + Tensor _hog_space; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDESCRIPTOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h new file mode 100644 index 0000000000..98b8a89bc1 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGDETECTOR_H__ +#define __ARM_COMPUTE_NEHOGDETECTOR_H__ + +#include "arm_compute/core/IHOG.h" +#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +/** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel: + * + * -# @ref NEHOGDetectorKernel + * + */ +class NEHOGDetector : public INESimpleFunction +{ +public: + /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class + * + * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. + * + * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 + * @param[in] hog HOG data-object that describes the HOG descriptor + * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects + * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. + * It must be multiple of the block stride stored in hog + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to + */ + void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); +}; +} + +#endif /* __ARM_COMPUTE_NEHOGDETECTOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h new file mode 100644 index 0000000000..dd2d99adfe --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGGradient.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGGRADIENT_H__ +#define __ARM_COMPUTE_NEHOGGRADIENT_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEDerivative.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +/** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels: + * + * -# @ref NEDerivative + * -# NEMagnitudePhaseKernel + * + */ +class NEHOGGradient : public IFunction +{ +public: + /** Default constructor */ + NEHOGGradient(); + /** Initialise the function's source, destinations, phase type and border mode + * + * @param[in, out] input Input tensor. Data type supported: U8. + * (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. + * @param[out] output_phase Output tensor.(phase). Format supported: U8 + * @param[in] phase_type Type of @ref PhaseType + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited method overridden: + void run() override; + +private: + NEDerivative _derivative; + std::unique_ptr _mag_phase; + Tensor _gx; + Tensor _gy; +}; +} +#endif /*__ARM_COMPUTE_NEHOGGRADIENT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h new file mode 100644 index 0000000000..2d07e6435f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ +#define __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ + +#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/IMultiHOG.h" +#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels: + * + * -# @ref NEHOGGradient + * -# @ref NEHOGOrientationBinningKernel + * -# @ref NEHOGBlockNormalizationKernel + * -# @ref NEHOGDetector + * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) + * + * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: + * -# Phase type + -# Normalization type + -# L2 hysteresis threshold if the normalization type is L2HYS_NORM + * + */ +class NEHOGMultiDetection : public IFunction +{ +public: + /** Default constructor */ + NEHOGMultiDetection(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGMultiDetection(const NEHOGMultiDetection &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete; + /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression + * + * @param[in, out] input Input tensor. Data type supported: U8 + * (Written to only for @p border_mode != UNDEFINED) + * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. + * This container should store the HOG data-objects in descending or ascending cell_size width order. + * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects + * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects + * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object + * The dimension of this array must be the same of multi_hog->num_models() + * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane + * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. + * True if the non-maxima suppression stage has to be computed + * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage + * + */ + void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value = 0, + float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); + + // Inherited method overridden: + void run() override; + +private: + NEHOGGradient _gradient_kernel; + std::unique_ptr _orient_bin_kernel; + std::unique_ptr _block_norm_kernel; + std::unique_ptr _hog_detect_kernel; + std::unique_ptr _non_maxima_kernel; + std::unique_ptr _hog_space; + std::unique_ptr _hog_norm_space; + IDetectionWindowArray *_detection_windows; + Tensor _mag; + Tensor _phase; + bool _non_maxima_suppression; + size_t _num_orient_bin_kernel; + size_t _num_block_norm_kernel; + size_t _num_hog_detect_kernel; +}; +} + +#endif /* __ARM_COMPUTE_NEHOGMULTIDETECTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h new file mode 100644 index 0000000000..a709871153 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHARRISCORNERS_H__ +#define __ARM_COMPUTE_NEHARRISCORNERS_H__ + +#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" +#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions: + * + * -# @ref NESobel3x3 (if gradient_size == 3) or
+ * @ref NESobel5x5 (if gradient_size == 5) or
+ * @ref NESobel7x7 (if gradient_size == 7) + * -# @ref NEFillBorderKernel + * -# NEHarrisScoreKernel<3> (if block_size == 3) or
+ * NEHarrisScoreKernel<5> (if block_size == 5) or
+ * NEHarrisScoreKernel<7> (if block_size == 7) + * -# @ref NENonMaximaSuppression3x3 + * -# @ref CPPCornerCandidatesKernel + * -# @ref CPPSortEuclideanDistanceKernel + * + */ +class NEHarrisCorners : public IFunction +{ +public: + /** Constructor + * + * Initialize _sobel, _harris_score and _corner_list to nullptr. + */ + NEHarrisCorners(); + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). + * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage + * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation + * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 + * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. + * @param[out] corners Array of keypoints to store the results. + * @param[in] border_mode Border mode to use + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(IImage *input, float threshold, float min_dist, float sensitivity, + int32_t gradient_size, int32_t block_size, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _sobel; /**< Sobel function */ + std::unique_ptr _harris_score; /**< Harris score kernel */ + NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ + CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ + CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ + NEFillBorderKernel _border_gx; /**< Border handler before running harris score */ + NEFillBorderKernel _border_gy; /**< Border handler before running harris score */ + Image _gx; /**< Source image - Gx component */ + Image _gy; /**< Source image - Gy component */ + Image _score; /**< Source image - Harris score */ + Image _nonmax; /**< Source image - Non-Maxima suppressed image */ + std::unique_ptr _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ + int32_t _num_corner_candidates; /**< Number of potential corner candidates */ +}; +} +#endif /*__ARM_COMPUTE_NEHARRISCORNERS_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h new file mode 100644 index 0000000000..c24510dcb3 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEHistogram.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEHISTOGRAM_H__ +#define __ARM_COMPUTE_NEHISTOGRAM_H__ + +#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include +#include + +namespace arm_compute +{ +class IDistribution1D; + +/** Basic function to run @ref NEHistogramKernel. */ +class NEHistogram : public IFunction +{ +public: + /** Default Constructor. */ + NEHistogram(); + /** Initialise the kernel's inputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] output Output distribution. + */ + void configure(const IImage *input, IDistribution1D *output); + + // Inherited methods overridden: + void run() override; + +private: + NEHistogramKernel _histogram_kernel; + std::unique_ptr _local_hist; + std::unique_ptr _window_lut; + size_t _local_hist_size; + /** 256 possible pixel values as we handle only U8 images */ + static constexpr unsigned int window_lut_default_size = 256; +}; +} +#endif /*__ARM_COMPUTE_NEHISTOGRAM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h new file mode 100644 index 0000000000..6d7dd697e8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEIntegralImage.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEINTEGRALIMAGE_H__ +#define __ARM_COMPUTE_NEINTEGRALIMAGE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run a @ref NEIntegralImageKernel */ +class NEIntegralImage : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] output Destination tensor. Data type supported: U32. + */ + void configure(const ITensor *input, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEINTEGRALIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h new file mode 100644 index 0000000000..991ae7c293 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELAPLACIANPYRAMID_H__ +#define __ARM_COMPUTE_NELAPLACIANPYRAMID_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" +#include "arm_compute/runtime/Pyramid.h" + +#include +#include +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute laplacian pyramid. This function calls the following NEON kernels and functions: + * + * -# @ref NEGaussianPyramidHalf + * -# @ref NEGaussian5x5 + * -# @ref NEArithmeticSubtraction + * + * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then + * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. + * L(i) = I(i) - Gaussian5x5(I(i)) + * Level 0 has always the same first two dimensions as the input tensor. +*/ +class NELaplacianPyramid : public IFunction +{ +public: + /** Constructor */ + NELaplacianPyramid(); + /** Initialise the function's source, destinations and border mode. + * + * @param[in] input Source tensor. Data type supported: U8. + * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16. + * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16. + * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: + * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) + * @param[in] border_mode Border mode to use. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + size_t _num_levels; + NEGaussianPyramidHalf _gaussian_pyr_function; + std::unique_ptr _convf; + std::unique_ptr _subf; + Pyramid _gauss_pyr; + Pyramid _conv_pyr; + NEDepthConvert _depth_function; +}; +} +#endif /*__ARM_COMPUTE_NELAPLACIANPYRAMID_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h new file mode 100644 index 0000000000..4139733499 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ +#define __ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEScale.h" +#include "arm_compute/runtime/Pyramid.h" + +#include +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute laplacian reconstruction. This function calls the following NEON kernels and functions: + * + * -# @ref NEArithmeticAddition + * -# @ref NEScale + * -# @ref NEDepthConvert + * + * This function reconstructs the original image from a Laplacian Image Pyramid. + * + * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the + * resolution of the next pyramid level. + * + * I(n-2) = upsample( input + L(n-1) + * + * For each pyramid level i, except i=0 and i=n-1: + * I(i-1) = upsample(I(i) + L(i)) + * + * output = I(0) + L(0) +*/ +class NELaplacianReconstruct : public IFunction +{ +public: + /** Constructor */ + NELaplacianReconstruct(); + /** Initialise the function's source, destinations and border mode. + * + * The Output image must have the same size as the first level of the pyramid. + * The Input image must have the same size as the last level of the pyramid. + * + * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. + * + * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16. + * @param[in] input Source tensor. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(const IPyramid *pyramid, const ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); + + // Inherited methods overridden: + void run() override; + +private: + Pyramid _tmp_pyr; + std::unique_ptr _addf; + std::unique_ptr _scalef; + NEDepthConvert _depthf; +}; +} +#endif /*__ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h new file mode 100644 index 0000000000..1b2b2ee3cf --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" +#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" +#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" +#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class INETensor; + +/** Basic function to compute the locally connected layer. This function calls the following NEON kernels: + * + * -# @ref NEWeightsReshapeKernel (executed only once for each configuration) + * -# @ref NEIm2ColKernel + * -# @ref NELocallyConnectedMatrixMultiplyKernel + * -# @ref NECol2ImKernel + */ +class NELocallyConnectedLayer : public IFunction +{ +public: + /** Default constructor */ + NELocallyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F32. + * @param[in] weights Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run() override; + +private: + NEIm2ColKernel _input_im2col_kernel; + NEWeightsReshapeKernel _weights_reshape_kernel; + NELocallyConnectedMatrixMultiplyKernel _mm_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + bool _is_first_run; +}; +} +#endif /* __ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h new file mode 100644 index 0000000000..6c1f988ef0 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMAGNITUDE_H__ +#define __ARM_COMPUTE_NEMAGNITUDE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run NEMagnitudePhaseKernel */ +class NEMagnitude : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs. + * + * @param[in] input1 First tensor input. Data type supported: S16. + * @param[in] input2 Second tensor input. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: S16. + * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, bool use_fp16 = false); +}; +} +#endif /*__ARM_COMPUTE_NEMAGNITUDE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h new file mode 100644 index 0000000000..3770b2a270 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEANSTDDEV_H__ +#define __ARM_COMPUTE_NEMEANSTDDEV_H__ + +#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic function to execute mean and std deviation. This function calls the following NEON kernels: + * + * @ref NEMeanStdDevKernel + * + */ +class NEMeanStdDev : public IFunction +{ +public: + /** Default Constructor. */ + NEMeanStdDev(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data type supported: U8. + * @param[out] mean Output average pixel value. + * @param[out] stddev (Optional) Output standard deviation of pixel values. + */ + void configure(const IImage *input, float *mean, float *stddev = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ + uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ + uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ +}; +} +#endif /*__ARM_COMPUTE_NEMEANSTDDEV_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h new file mode 100644 index 0000000000..a3df687a35 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMedian3x3.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMEDIAN3x3_H__ +#define __ARM_COMPUTE_NEMEDIAN3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute median filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEMedian3x3Kernel + * + */ +class NEMedian3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor, Data type supported: U8. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEMEDIAN3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h new file mode 100644 index 0000000000..82e75ee48b --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEMINMAXLOCATION_H__ +#define __ARM_COMPUTE_NEMINMAXLOCATION_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; +using IImage = ITensor; + +/** Basic function to execute min and max location. This function calls the following NEON kernels: + * + * -# NEMinMaxKernel + * -# NEMinMaxLocationKernel + */ +class NEMinMaxLocation : public IFunction +{ +public: + /** Constructor */ + NEMinMaxLocation(); + /** Initialise the kernel's inputs and outputs. + * + * @param[in] input Input image. Data types supported: U8/S16. + * @param[out] min Minimum value of image. + * @param[out] max Maximum value of image. + * @param[out] min_loc (Optional) Array of minimum value locations. + * @param[out] max_loc (Optional) Array of maximum value locations. + * @param[out] min_count (Optional) Number of minimum value encounters. + * @param[out] max_count (Optional) Number of maximum value encounters. + */ + void configure(const IImage *input, int32_t *min, int32_t *max, + ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, + uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); + + // Inherited methods overridden: + void run() override; + +private: + NEMinMaxKernel _min_max; /**< Kernel that performs min/max */ + NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */ +}; +} +#endif /*__ARM_COMPUTE_NEMINMAXLOCATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h new file mode 100644 index 0000000000..d8a9eaebfb --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONLINEARFILTER_H__ +#define __ARM_COMPUTE_NENONLINEARFILTER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute non linear filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NENonLinearFilterKernel + * + * @note Supported mask dimensions squares of sizes 3, 5 + */ +class NENonLinearFilter : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, conv and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] function Non linear function to perform + * @param[in] mask_size Mask size. Supported sizes: 3, 5 + * @param[in] pattern Mask pattern + * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, + uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NENONLINEARFILTER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h new file mode 100644 index 0000000000..c87d722878 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ +#define __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NENonMaximaSuppression3x3Kernel + * + */ +class NENonMaximaSuppression3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * The constant values used with CONSTANT border mode is 0 + * + * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input + * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT + * + */ + void configure(ITensor *input, ITensor *output, BorderMode border_mode); +}; +} +#endif /* __ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h new file mode 100644 index 0000000000..3202867c43 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/Tensor.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to simulate a normalization layer. This function calls the following NEON kernels: + * + * -# @ref NEPixelWiseMultiplicationKernel + * -# @ref NEFillBorderKernel + * -# @ref NENormalizationLayerKernel + * + */ +class NENormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + NENormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data type supported: QS8/F32 + * @param[out] output Destination with the same dimensions, data type and number of channels of @p input + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run() override; + +private: + NENormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel */ + NEPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel */ + NEFillBorderKernel _border_handler; /**< Kernel to handle borders */ + Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */ +}; +} +#endif /* __ARM_COMPUTE_NENORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h new file mode 100644 index 0000000000..0534551d19 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEOPTICALFLOW_H__ +#define __ARM_COMPUTE_NEOPTICALFLOW_H__ + +#include "arm_compute/core/IArray.h" +#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include +#include + +namespace arm_compute +{ +class Pyramid; + +using LKInternalKeypointArray = Array; +/** Basic function to execute optical flow. This function calls the following NEON kernels and functions: + * + * -# @ref NEScharr3x3 + * -# @ref NELKTrackerKernel + * + */ +class NEOpticalFlow : public IFunction +{ +public: + /** Constructor */ + NEOpticalFlow(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEOpticalFlow(const NEOpticalFlow &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEOpticalFlow &operator=(const NEOpticalFlow &) = delete; + /** Initialise the function input and output + * + * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8 + * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8 + * @param[in] old_points Pointer to the IKeyPointArray storing old key points + * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points + * @param[out] new_points Pointer to the IKeyPointArray storing new key points + * @param[in] termination The criteria to terminate the search of each keypoint. + * @param[in] epsilon The error for terminating the algorithm + * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used + * @param[in] border_mode The border mode applied at scharr kernel stage + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT + * + */ + void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, + IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, + bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _func_scharr; + std::unique_ptr _kernel_tracker; + std::unique_ptr _scharr_gx; + std::unique_ptr _scharr_gy; + IKeyPointArray *_new_points; + const IKeyPointArray *_new_points_estimates; + const IKeyPointArray *_old_points; + LKInternalKeypointArray _new_points_internal; + LKInternalKeypointArray _old_points_internal; + unsigned int _num_levels; +}; +} +#endif /*__ARM_COMPUTE_NEOPTICALFLOW_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h new file mode 100644 index 0000000000..985ba84c4c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPhase.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPHASE_H__ +#define __ARM_COMPUTE_NEPHASE_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run NEMagnitudePhaseKernel */ +class NEPhase : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in] input1 First tensor input. Data type supported: S16. + * @param[in] input2 Second tensor input. Data type supported: S16. + * @param[out] output Output tensor. Data type supported: U8. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NEPHASE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h new file mode 100644 index 0000000000..de7a797cd8 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ +#define __ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEPixelWiseMultiplicationKernel */ +class NEPixelWiseMultiplication : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: U8/QS8/S16/F32. + * @param[in] input2 Second tensor input. Data types supported: U8/QS8/S16/F32. + * @param[out] output Output tensor. Data types supported: U8/QS8/S16/F32. + * @param[in] scale Scale to apply after multiplication. Must be positive. + * @param[in] overflow_policy Overflow policy. + * @param[in] rounding_policy Rounding policy. + */ + void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); +}; +} +#endif /*__ARM_COMPUTE_NEPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h new file mode 100644 index 0000000000..5a9cffa5ae --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEPOOLINGLAYER_H__ +#define __ARM_COMPUTE_NEPOOLINGLAYER_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if padding size is different from zero) + * -# @ref NEPoolingLayerKernel + */ +class NEPoolingLayer : public INESimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); +}; +} +#endif /* __ARM_COMPUTE_NEPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h new file mode 100644 index 0000000000..b1ec559817 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NERemap.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEREMAP_H__ +#define __ARM_COMPUTE_NEREMAP_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute remap. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NERemapKernel + */ +class NERemap : public INESimpleFunction +{ +public: + /** Initialise the function's sources, destination, interpolation policy and border mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[in] map_x Map for X coordinates. Data type supported: F32. + * @param[in] map_y Map for Y coordinates. Data type supported: F32. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, + InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEREMAP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h new file mode 100644 index 0000000000..e1da891dcf --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCALEIMAGE_H__ +#define __ARM_COMPUTE_NESCALEIMAGE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEScaleKernel */ +class NEScale : public INESimpleFunction +{ +public: + /** Constructor + * + * Initialize NEScale + */ + NEScale(); + /** Initialize the function's source, destination, interpolation type and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + +private: + Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */ + Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */ + Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */ +}; +} +#endif /*__ARM_COMPUTE_NESCALEIMAGE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h new file mode 100644 index 0000000000..db24723902 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEScharr3x3.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESCHARR3x3_H__ +#define __ARM_COMPUTE_NESCHARR3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute scharr 3x3 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NEScharr3x3Kernel + * + */ +class NEScharr3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NESCHARR3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h new file mode 100644 index 0000000000..e2896ba058 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel3x3.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL3x3_H__ +#define __ARM_COMPUTE_NESOBEL3x3_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 3x3 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel3x3Kernel + * + */ +class NESobel3x3 : public INESimpleFunction +{ +public: + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NESOBEL3x3_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h new file mode 100644 index 0000000000..fc4d665a70 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel5x5.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL5x5_H__ +#define __ARM_COMPUTE_NESOBEL5x5_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel5x5HorKernel + * -# @ref NESobel5x5VertKernel + * + */ +class NESobel5x5 : public IFunction +{ +public: + /** Default constructor */ + NESobel5x5(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16. + * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NESobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ + NESobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL5x5_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h new file mode 100644 index 0000000000..06b7c80ad6 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESobel7x7.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOBEL7x7_H__ +#define __ARM_COMPUTE_NESOBEL7x7_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels: + * + * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) + * -# @ref NESobel7x7HorKernel + * -# @ref NESobel7x7VertKernel + * + */ +class NESobel7x7 : public IFunction +{ +public: + /** Default constructor */ + NESobel7x7(); + /** Initialise the function's source, destinations and border mode. + * + * @note At least one of output_x or output_y must be not NULL. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32. + * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + * + */ + void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); + + // Inherited methods overridden: + void run() override; + +protected: + NESobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ + NESobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */ + Tensor _tmp_x; /**< Temporary buffer for Sobel X */ + Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ + NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */ +}; +} +#endif /*__ARM_COMPUTE_NESOBEL7x7_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h new file mode 100644 index 0000000000..dc84dec0e4 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NESOFTMAXLAYER_H__ +#define __ARM_COMPUTE_NESOFTMAXLAYER_H__ + +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/Tensor.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to compute a SoftmaxLayer. + * + * Softmax is calculated by : + * @f[ out = \frac{e^{x - max(x)}}{\sum{e^{x - max(x)}}} @f] + * + * This function runs the following kernels: + * -# @ref NELogits1DMaxKernel + * -# @ref NELogits1DShiftExpSumKernel + * -# @ref NELogits1DNormKernel + */ +class NESoftmaxLayer : public IFunction +{ +public: + /** Constructor */ + NESoftmaxLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data types supported: same as @p input. + */ + void configure(ITensor *input, ITensor *output); + + // Inherited methods overridden: + void run() override; + +private: + NELogits1DMaxKernel _max_kernel; + NELogits1DShiftExpSumKernel _shift_exp_sum_kernel; + NELogits1DNormKernel _norm_kernel; + NEFillBorderKernel _fill_border_kernel; + Tensor _max; + Tensor _sum; + Tensor _tmp; +}; +} +#endif /* __ARM_COMPUTE_NESOFTMAXLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h new file mode 100644 index 0000000000..b59ffb877c --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETableLookup.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETABLELOOKUP_H__ +#define __ARM_COMPUTE_NETABLELOOKUP_H__ + +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; +class ILut; + +/** Basic function to run @ref NETableLookupKernel */ +class NETableLookup : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input First tensor input. Data types supported: U8/S16 + * @param[in] lut Input lookup table. + * @param[out] output Output tensor. Data types supported: same as @p input + */ + void configure(const ITensor *input, const ILut *lut, ITensor *output); +}; +} +#endif /*__ARM_COMPUTE_NETABLELOOKUP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h new file mode 100644 index 0000000000..d407ee5b15 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEThreshold.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETHRESHOLD_H__ +#define __ARM_COMPUTE_NETHRESHOLD_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEThresholdKernel */ +class NEThreshold : public INESimpleFunction +{ +public: + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold + * @param[in] false_value Value to assign when the condition is false + * @param[in] true_value value to assign when the condition is true + * @param[in] type Thresholding type. Can either be BINARY or RANGE. + * @param[in] upper Upper threshold. Only used with RANGE thresholding + */ + void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, + ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); +}; +} +#endif /*__ARM_COMPUTE_NETHRESHOLD_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h new file mode 100644 index 0000000000..4b606e7282 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NETRANSPOSE_H__ +#define __ARM_COMPUTE_NETRANSPOSE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel: + * + * -# @ref NETransposeKernel + * + */ +class NETranspose : public INESimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const ITensor *input, ITensor *output); +}; +} + +#endif /* __ARM_COMPUTE_NETRANSPOSE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h new file mode 100644 index 0000000000..f8eebe8d2a --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEWarpAffine.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPAFFINE_H__ +#define __ARM_COMPUTE_NEWARPAFFINE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEWarpAffineKernel */ +class NEWarpAffine : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] matrix The perspective matrix. Must be 2x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEWARPAFFINE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h new file mode 100644 index 0000000000..d0699291b1 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEWARPPERSPECTIVE_H__ +#define __ARM_COMPUTE_NEWARPPERSPECTIVE_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEWarpPerspectiveKernel */ +class NEWarpPerspective : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination, interpolation policy and border_mode. + * + * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: U8 + * @param[in] matrix The perspective matrix. Must be 3x3 of type float. + * @param[in] policy The interpolation type. + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); +}; +} +#endif /*__ARM_COMPUTE_NEWARPPERSPECTIVE_H__ */ diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h new file mode 100644 index 0000000000..21df6a699d --- /dev/null +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OMPSCHEDULER_H__ +#define __ARM_COMPUTE_OMPSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +namespace arm_compute +{ +/** Pool of threads to automatically split a kernel's execution among several threads. */ +class OMPScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads If set to 0, then the number returned by omp_get_max_threads() will be used, otherwise the number of threads specified. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the OMPScheduler has in its pool. + * + * @return Number of threads available in OMPScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static OMPScheduler &get(); + /** Multithread the execution of the passed kernel if possible. + * + * The kernel will run on a single thread if any of these conditions is true: + * - ICPPKernel::is_parallelisable() returns false + * - The scheduler has been initialized with only one thread. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor. */ + OMPScheduler(); + + unsigned int _num_threads; +}; +} +#endif /* __ARM_COMPUTE_OMPSCHEDULER_H__ */ diff --git a/arm_compute/runtime/Pyramid.h b/arm_compute/runtime/Pyramid.h new file mode 100644 index 0000000000..2e7613759f --- /dev/null +++ b/arm_compute/runtime/Pyramid.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_PYRAMID_H__ +#define __ARM_COMPUTE_PYRAMID_H__ + +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" + +#include +#include + +namespace arm_compute +{ +class Tensor; + +/** Basic implementation of the pyramid interface */ +class Pyramid : public IPyramid +{ +public: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + */ + void init(const PyramidInfo &info); + + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @note Uses conservative padding strategy which fits all kernels. + * + * @param[in] info Pyramid's metadata + */ + void init_auto_padding(const PyramidInfo &info); + + /** Allocate the planes in the pyramid */ + void allocate(); + + // Inherited method overridden + const PyramidInfo *info() const override; + Tensor *get_pyramid_level(size_t index) const override; + +private: + /** Initialize pyramid data-object using the given Pyramid's metadata + * + * @param[in] info Pyramid's metadata + * @param[in] auto_padding Specifies whether the image in the pyramid use auto padding + */ + void internal_init(const PyramidInfo &info, bool auto_padding); + + PyramidInfo _info{}; + std::unique_ptr _pyramid{ nullptr }; +}; +} +#endif /*__ARM_COMPUTE_PYRAMID_H__ */ diff --git a/arm_compute/runtime/Scheduler.h b/arm_compute/runtime/Scheduler.h new file mode 100644 index 0000000000..21f944b75f --- /dev/null +++ b/arm_compute/runtime/Scheduler.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SCHEDULER_H__ +#define __ARM_COMPUTE_SCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" +#include + +namespace arm_compute +{ +/** Configurable scheduler which supports multiple multithreading APIs and choosing between different schedulers at runtime. */ +class Scheduler +{ +public: + enum class Type + { + ST, // Single thread. + CPP, // C++11 threads. + OMP, // OpenMP. + CUSTOM // Provided by the user. + }; + /** Sets the user defined scheduler and makes it the active scheduler. + * + * @param[in] scheduler A shared pointer to a custom scheduler implemented by the user. + */ + static void set(std::shared_ptr &scheduler); + /** Access the scheduler singleton. + * + * @return A reference to the scheduler object. + */ + static IScheduler &get(); + /** Set the active scheduler. + * + * Only one scheduler can be enabled at any time. + * + * @param[in] t the type of the scheduler to be enabled. + */ + static void set(Type t); + /** Returns the type of the active scheduler. + * + * @return The current scheduler's type. + */ + static Type get_type(); + /** Returns true if the given scheduler type is supported. False otherwise. + * + * @return true if the given scheduler type is supported. False otherwise. + */ + static bool is_available(Type t); + +private: + static Type _scheduler_type; + static std::shared_ptr _custom_scheduler; + Scheduler(); +}; +} +#endif /* __ARM_COMPUTE_SCHEDULER_H__ */ diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h new file mode 100644 index 0000000000..a6e1defe7c --- /dev/null +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ +#define __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ + +#include "arm_compute/runtime/IScheduler.h" + +namespace arm_compute +{ +/** Pool of threads to automatically split a kernel's execution among several threads. */ +class SingleThreadScheduler : public IScheduler +{ +public: + /** Sets the number of threads the scheduler will use to run the kernels. + * + * @param[in] num_threads This is ignored for this scheduler as the number of threads is always one. + */ + void set_num_threads(unsigned int num_threads) override; + /** Returns the number of threads that the SingleThreadScheduler has, which is always 1. + * + * @return Number of threads available in SingleThreadScheduler. + */ + unsigned int num_threads() const override; + /** Access the scheduler singleton + * + * @return The scheduler + */ + static SingleThreadScheduler &get(); + /** Runs the kernel in the same thread as the caller synchronously. + * + * @param[in] kernel Kernel to execute. + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + */ + void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + +private: + /** Constructor. */ + SingleThreadScheduler() = default; +}; +} +#endif /* __ARM_COMPUTE_SINGLETHREADSCHEDULER_H__ */ diff --git a/arm_compute/runtime/SubTensor.h b/arm_compute/runtime/SubTensor.h new file mode 100644 index 0000000000..bdb229de49 --- /dev/null +++ b/arm_compute/runtime/SubTensor.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_SUBTENSOR_H__ +#define __ARM_COMPUTE_SUBTENSOR_H__ + +#include "arm_compute/core/SubTensorInfo.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the sub-tensor interface */ +class SubTensor : public ITensor +{ +public: + /** Constructor + * + * @param[in] parent Parent tensor + * @param[in] tensor_shape Shape of the subtensor + * @param[in] coords Coordinates of the first subtensor element inside the parent tensor. + */ + SubTensor(ITensor *parent, const TensorShape &tensor_shape, const Coordinates &coords); + /** Destructor: free the tensor's memory */ + ~SubTensor() = default; + /** Restrict instances of this class to be copy constructed */ + SubTensor(const SubTensor &) = delete; + /** Restrict instances of this class to be copied */ + SubTensor &operator=(const SubTensor &) = delete; + /** Allow instances of this class to be move constructed */ + SubTensor(SubTensor &&) = default; + /** Allow instances of this class to be moved */ + SubTensor &operator=(SubTensor &&) = default; + /** Return the parent tensor of the subtensor + * + * @return Parent tensor + */ + ITensor *parent(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + uint8_t *buffer() const override; + +private: + ITensor *_parent; + mutable SubTensorInfo _info; +}; +} +#endif /*__ARM_COMPUTE_SUBTENSOR_H__ */ diff --git a/arm_compute/runtime/Tensor.h b/arm_compute/runtime/Tensor.h new file mode 100644 index 0000000000..1fe73a2353 --- /dev/null +++ b/arm_compute/runtime/Tensor.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSOR_H__ +#define __ARM_COMPUTE_TENSOR_H__ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include + +namespace arm_compute +{ +class ITensorInfo; + +/** Basic implementation of the tensor interface */ +class Tensor : public ITensor +{ +public: + /** Constructor */ + Tensor(); + /** Destructor: free the tensor's memory */ + ~Tensor() = default; + /** Allow instances of this class to be move constructed */ + Tensor(Tensor &&) = default; + /** Allow instances of this class to be moved */ + Tensor &operator=(Tensor &&) = default; + /** Return a pointer to the tensor's allocator + * + * @return A pointer to the tensor's allocator + */ + TensorAllocator *allocator(); + + // Inherited methods overridden: + ITensorInfo *info() const override; + ITensorInfo *info() override; + uint8_t *buffer() const override; + +private: + mutable TensorAllocator _allocator; /**< Instance of the basic CPU allocator.*/ +}; + +using Image = Tensor; +} +#endif /*__ARM_COMPUTE_TENSOR_H__ */ diff --git a/arm_compute/runtime/TensorAllocator.h b/arm_compute/runtime/TensorAllocator.h new file mode 100644 index 0000000000..450323b3ab --- /dev/null +++ b/arm_compute/runtime/TensorAllocator.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TENSORALLOCATOR_H__ +#define __ARM_COMPUTE_TENSORALLOCATOR_H__ + +#include "arm_compute/runtime/ITensorAllocator.h" + +#include +#include +#include + +namespace arm_compute +{ +class Coordinates; +class TensorInfo; + +/** Basic implementation of a CPU memory tensor allocator. */ +class TensorAllocator : public ITensorAllocator +{ +public: + /** Default constructor. */ + TensorAllocator(); + + /** Make ITensorAllocator's init methods available */ + using ITensorAllocator::init; + + /** Shares the same backing memory with another tensor allocator, while the tensor info might be different. + * In other words this can be used to create a sub-tensor from another tensor while sharing the same memory. + * + * @note TensorAllocator have to be of the same specialized type. + * + * @param[in] allocator The allocator that owns the backing memory to be shared. Ownership becomes shared afterwards. + * @param[in] coords The starting coordinates of the new tensor inside the parent tensor. + * @param[in] sub_info The new tensor information (e.g. shape etc) + */ + void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info); + + /** Returns the pointer to the allocated data. */ + uint8_t *data() const; + + /** Allocate size specified by TensorInfo of CPU memory. + * + * @note The tensor must not already be allocated when calling this function. + * + */ + void allocate() override; + + /** Free allocated CPU memory. + * + * @note The tensor must have been allocated when calling this function. + * + */ + void free() override; + +protected: + /** No-op for CPU memory + * + * @return A pointer to the beginning of the tensor's allocation. + */ + uint8_t *lock() override; + + /** No-op for CPU memory. */ + void unlock() override; + +private: + std::shared_ptr> _buffer; /**< CPU memory allocation. */ +}; +} +#endif /* __ARM_COMPUTE_TENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/Utils.h b/arm_compute/runtime/Utils.h new file mode 100644 index 0000000000..2f037a0621 --- /dev/null +++ b/arm_compute/runtime/Utils.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_RUNTIME_UTILS_H__ +#define __ARM_COMPUTE_RUNTIME_UTILS_H__ + +#include "arm_compute/runtime/Scheduler.h" + +#include + +namespace arm_compute +{ +/** Convert a Scheduler::Type into a string. + * + * @param[in] t @ref Scheduler::Type to be translated to string. + * + * @return The string describing the scheduler type. + */ +const std::string &string_from_scheduler_type(Scheduler::Type t); +} +#endif /* __ARM_COMPUTE_RUNTIME_UTILS_H__ */ diff --git a/data b/data new file mode 160000 index 0000000000..1f4578a90c --- /dev/null +++ b/data @@ -0,0 +1 @@ +Subproject commit 1f4578a90cde937d510198fc0926adf42a814409 diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox new file mode 100644 index 0000000000..f84e64e9a0 --- /dev/null +++ b/docs/00_introduction.dox @@ -0,0 +1,514 @@ +/** @mainpage Introduction + +@tableofcontents + +The Computer Vision and Machine Learning library is a set of functions optimised for both ARM CPUs and GPUs using SIMD technologies. + +Several builds of the library are available using various configurations: + - OS: Linux, Android or bare metal. + - Architecture: armv7a (32bit) or arm64-v8a (64bit) + - Technology: NEON / OpenCL / NEON and OpenCL + - Debug / Asserts / Release: Use a build with asserts enabled to debug your application and enable extra validation. Once you are sure your application works as expected you can switch to a release build of the library for maximum performance. + +@section S0_1_contact Contact / Support + +Please email developer@arm.com + +In order to facilitate the work of the support team please provide the build information of the library you are using. To get the version of the library you are using simply run: + + $ strings android-armv7a-cl-asserts/libarm_compute.so | grep arm_compute_version + arm_compute_version=v16.12 Build options: {'embed_kernels': '1', 'opencl': '1', 'arch': 'armv7a', 'neon': '0', 'asserts': '1', 'debug': '0', 'os': 'android', 'Werror': '1'} Git hash=f51a545d4ea12a9059fe4e598a092f1fd06dc858 + +@section S1_file_organisation File organisation + +This archive contains: + - The arm_compute header and source files + - The latest Khronos OpenCL 1.2 C headers from the Khronos OpenCL registry + - The latest Khronos cl2.hpp from the Khronos OpenCL registry (API version 2.1 when this document was written) + - The sources for a stub version of libOpenCL.so to help you build your application. + - An examples folder containing a few examples to compile and link against the library. + - A @ref utils folder containing headers with some boiler plate code used by the examples. + - This documentation. + +You should have the following file organisation: + + . + ├── arm_compute --> All the arm_compute headers + │   ├── core + │   │   ├── CL + │   │   │   ├── CLKernels.h --> Includes all the OpenCL kernels at once + │   │   │   ├── CL specialisation of all the generic objects interfaces (ICLTensor, ICLImage, etc.) + │   │   │   ├── kernels --> Folder containing all the OpenCL kernels + │   │   │   │   └── CL*Kernel.h + │   │   │   └── OpenCL.h --> Wrapper to configure the Khronos OpenCL C++ header + │   │ ├── CPP + │   │ │   └── kernels --> Folder containing all the CPP kernels + │   │   │   │   └── CPP*Kernel.h + │   │   ├── NEON + │   │   │   ├── kernels --> Folder containing all the NEON kernels + │   │   │   │   └── NE*Kernel.h + │   │   │   └── NEKernels.h --> Includes all the NEON kernels at once + │   │   ├── All common basic types (Types.h, Window, Coordinates, Iterator, etc.) + │   │   ├── All generic objects interfaces (ITensor, IImage, etc.) + │   │   └── Objects metadata classes (ImageInfo, TensorInfo, MultiImageInfo) + │   └── runtime + │   ├── CL + │   │   ├── CL objects & allocators (CLArray, CLImage, CLTensor, etc.) + │   │   ├── functions --> Folder containing all the OpenCL functions + │   │   │   └── CL*.h + │   │   └── CLFunctions.h --> Includes all the OpenCL functions at once + │   ├── CPP + │   │   └── Scheduler.h --> Basic pool of threads to execute CPP/NEON code on several cores in parallel + │   ├── NEON + │   │ ├── functions --> Folder containing all the NEON functions + │   │ │   └── NE*.h + │   │ └── NEFunctions.h --> Includes all the NEON functions at once + │   └── Basic implementations of the generic object interfaces (Array, Image, Tensor, etc.) + ├── documentation + │   ├── index.xhtml + │   └── ... + ├── documentation.xhtml -> documentation/index.xhtml + ├── examples + │   ├── cl_convolution.cpp + │   ├── neoncl_scale_median_gaussian.cpp + │   ├── neon_convolution.cpp + │   └── neon_scale.cpp + ├── include + │   └── CL + │   └── Khronos OpenCL C headers and C++ wrapper + ├── opencl-1.2-stubs + │ └── opencl_stubs.c + ├── src + │   ├── core + │ │ └── ... (Same structure as headers) + │   │ └── CL + │   │ └── cl_kernels --> All the OpenCL kernels + │ └── runtime + │ └── ... (Same structure as headers) + ├── tests + │   ├── All test related files shared between validation and benchmark + │   ├── CL --> OpenCL specific files (shared) + │   ├── NEON --> NEON specific files (shared) + │   ├── benchmark --> Sources for benchmarking + │ │ ├── Benchmark specific files + │ │ ├── main.cpp --> Entry point for benchmark test framework + │ │ ├── CL --> OpenCL benchmarking tests + │ │ └── NEON --> NEON benchmarking tests + │   ├── validation --> Sources for validation + │ │ ├── Validation specific files + │ │ ├── main.cpp --> Entry point for validation test framework + │ │ ├── CL --> OpenCL validation tests + │ │ ├── NEON --> NEON validation tests + │ │ └── UNIT --> Library validation tests + │   └── dataset --> Datasets defining common sets of input parameters + └── utils --> Boiler plate code used by examples + └── Utils.h + +@section S2_versions_changelog Release versions and changelog + +@subsection S2_1_versions Release versions + +All releases are numbered vYY.MM Where YY are the last two digits of the year, and MM the month number. +If there is more than one release in a month then an extra sequential number is appended at the end: + + v17.03 (First release of March 2017) + v17.03.1 (Second release of March 2017) + v17.04 (First release of April 2017) + +@note We're aiming at releasing one major public release with new features per quarter. All releases in between will only contain bug fixes. + +@subsection S2_2_changelog Changelog + +v17.06 Public major release + - Various bug fixes + - Added support for fixed point 8 bit (QS8) to the various NEON machine learning kernels. + - Added unit tests and benchmarks (AlexNet, LeNet) + - Added support for sub tensors. + - Added infrastructure to provide GPU specific optimisation for some OpenCL kernels. + - Added @ref arm_compute::OMPScheduler (OpenMP) scheduler for NEON + - Added @ref arm_compute::SingleThreadScheduler scheduler for NEON (For bare metal) + - User can specify his own scheduler by implementing the @ref arm_compute::IScheduler interface. + - New OpenCL kernels / functions: + - @ref arm_compute::CLBatchNormalizationLayerKernel / @ref arm_compute::CLBatchNormalizationLayer + - @ref arm_compute::CLDepthConcatenateKernel / @ref arm_compute::CLDepthConcatenate + - @ref arm_compute::CLHOGOrientationBinningKernel @ref arm_compute::CLHOGBlockNormalizationKernel, @ref arm_compute::CLHOGDetectorKernel / @ref arm_compute::CLHOGDescriptor @ref arm_compute::CLHOGDetector @ref arm_compute::CLHOGGradient @ref arm_compute::CLHOGMultiDetection + - @ref arm_compute::CLLocallyConnectedMatrixMultiplyKernel / @ref arm_compute::CLLocallyConnectedLayer + - @ref arm_compute::CLWeightsReshapeKernel / @ref arm_compute::CLConvolutionLayerReshapeWeights + - New C++ kernels: + - @ref arm_compute::CPPDetectionWindowNonMaximaSuppressionKernel + - New NEON kernels / functions: + - @ref arm_compute::NEBatchNormalizationLayerKernel / @ref arm_compute::NEBatchNormalizationLayer + - @ref arm_compute::NEDepthConcatenateKernel / @ref arm_compute::NEDepthConcatenate + - @ref arm_compute::NEDirectConvolutionLayerKernel / @ref arm_compute::NEDirectConvolutionLayer + - @ref arm_compute::NELocallyConnectedMatrixMultiplyKernel / @ref arm_compute::NELocallyConnectedLayer + - @ref arm_compute::NEWeightsReshapeKernel / @ref arm_compute::NEConvolutionLayerReshapeWeights + +v17.05 Public bug fixes release + - Various bug fixes + - Remaining of the functions ported to use accurate padding. + - Library does not link against OpenCL anymore (It uses dlopen / dlsym at runtime instead to determine whether or not OpenCL is available). + - Added "free" method to allocator. + - Minimum version of g++ required for armv7 Linux changed from 4.8 to 4.9 + +v17.04 Public bug fixes release + + The following functions have been ported to use the new accurate padding: + - @ref arm_compute::CLColorConvertKernel + - @ref arm_compute::CLEdgeNonMaxSuppressionKernel + - @ref arm_compute::CLEdgeTraceKernel + - @ref arm_compute::CLGaussianPyramidHorKernel + - @ref arm_compute::CLGaussianPyramidVertKernel + - @ref arm_compute::CLGradientKernel + - @ref arm_compute::NEChannelCombineKernel + - @ref arm_compute::NEFillArrayKernel + - @ref arm_compute::NEGaussianPyramidHorKernel + - @ref arm_compute::NEGaussianPyramidVertKernel + - @ref arm_compute::NEHarrisScoreFP16Kernel + - @ref arm_compute::NEHarrisScoreKernel + - @ref arm_compute::NEHOGDetectorKernel + - @ref arm_compute::NELogits1DMaxKernel + - @ref arm_compute::NELogits1DShiftExpSumKernel + - @ref arm_compute::NELogits1DNormKernel + - @ref arm_compute::NENonMaximaSuppression3x3FP16Kernel + - @ref arm_compute::NENonMaximaSuppression3x3Kernel + + +v17.03.1 First Major public release of the sources + - Renamed the library to arm_compute + - New CPP target introduced for C++ kernels shared between NEON and CL functions. + - New padding calculation interface introduced and ported most kernels / functions to use it. + - New OpenCL kernels / functions: + - @ref arm_compute::CLGEMMLowpMatrixMultiplyKernel / @ref arm_compute::CLGEMMLowp + - New NEON kernels / functions: + - @ref arm_compute::NENormalizationLayerKernel / @ref arm_compute::NENormalizationLayer + - @ref arm_compute::NETransposeKernel / @ref arm_compute::NETranspose + - @ref arm_compute::NELogits1DMaxKernel, @ref arm_compute::NELogits1DShiftExpSumKernel, @ref arm_compute::NELogits1DNormKernel / @ref arm_compute::NESoftmaxLayer + - @ref arm_compute::NEIm2ColKernel, @ref arm_compute::NECol2ImKernel, arm_compute::NEConvolutionLayerWeightsReshapeKernel / @ref arm_compute::NEConvolutionLayer + - @ref arm_compute::NEGEMMMatrixAccumulateBiasesKernel / @ref arm_compute::NEFullyConnectedLayer + - @ref arm_compute::NEGEMMLowpMatrixMultiplyKernel / @ref arm_compute::NEGEMMLowp + +v17.03 Sources preview + - New OpenCL kernels / functions: + - @ref arm_compute::CLGradientKernel, @ref arm_compute::CLEdgeNonMaxSuppressionKernel, @ref arm_compute::CLEdgeTraceKernel / @ref arm_compute::CLCannyEdge + - GEMM refactoring + FP16 support: @ref arm_compute::CLGEMMInterleave4x4Kernel, @ref arm_compute::CLGEMMTranspose1xWKernel, @ref arm_compute::CLGEMMMatrixMultiplyKernel, @ref arm_compute::CLGEMMMatrixAdditionKernel / @ref arm_compute::CLGEMM + - @ref arm_compute::CLGEMMMatrixAccumulateBiasesKernel / @ref arm_compute::CLFullyConnectedLayer + - @ref arm_compute::CLTransposeKernel / @ref arm_compute::CLTranspose + - @ref arm_compute::CLLKTrackerInitKernel, @ref arm_compute::CLLKTrackerStage0Kernel, @ref arm_compute::CLLKTrackerStage1Kernel, @ref arm_compute::CLLKTrackerFinalizeKernel / @ref arm_compute::CLOpticalFlow + - @ref arm_compute::CLNormalizationLayerKernel / @ref arm_compute::CLNormalizationLayer + - @ref arm_compute::CLLaplacianPyramid, @ref arm_compute::CLLaplacianReconstruct + - New NEON kernels / functions: + - @ref arm_compute::NEActivationLayerKernel / @ref arm_compute::NEActivationLayer + - GEMM refactoring + FP16 support (Requires armv8.2 CPU): @ref arm_compute::NEGEMMInterleave4x4Kernel, @ref arm_compute::NEGEMMTranspose1xWKernel, @ref arm_compute::NEGEMMMatrixMultiplyKernel, @ref arm_compute::NEGEMMMatrixAdditionKernel / @ref arm_compute::NEGEMM + - @ref arm_compute::NEPoolingLayerKernel / @ref arm_compute::NEPoolingLayer + +v17.02.1 Sources preview + - New OpenCL kernels / functions: + - @ref arm_compute::CLLogits1DMaxKernel, @ref arm_compute::CLLogits1DShiftExpSumKernel, @ref arm_compute::CLLogits1DNormKernel / @ref arm_compute::CLSoftmaxLayer + - @ref arm_compute::CLPoolingLayerKernel / @ref arm_compute::CLPoolingLayer + - @ref arm_compute::CLIm2ColKernel, @ref arm_compute::CLCol2ImKernel, @ref arm_compute::CLConvolutionLayerWeightsReshapeKernel / @ref arm_compute::CLConvolutionLayer + - @ref arm_compute::CLRemapKernel / @ref arm_compute::CLRemap + - @ref arm_compute::CLGaussianPyramidHorKernel, @ref arm_compute::CLGaussianPyramidVertKernel / @ref arm_compute::CLGaussianPyramid, @ref arm_compute::CLGaussianPyramidHalf, @ref arm_compute::CLGaussianPyramidOrb + - @ref arm_compute::CLMinMaxKernel, @ref arm_compute::CLMinMaxLocationKernel / @ref arm_compute::CLMinMaxLocation + - @ref arm_compute::CLNonLinearFilterKernel / @ref arm_compute::CLNonLinearFilter + - New NEON FP16 kernels (Requires armv8.2 CPU) + - @ref arm_compute::NEAccumulateWeightedFP16Kernel + - @ref arm_compute::NEBox3x3FP16Kernel + - @ref arm_compute::NENonMaximaSuppression3x3FP16Kernel + +v17.02 Sources preview + - New OpenCL kernels / functions: + - @ref arm_compute::CLActivationLayerKernel / @ref arm_compute::CLActivationLayer + - @ref arm_compute::CLChannelCombineKernel / @ref arm_compute::CLChannelCombine + - @ref arm_compute::CLDerivativeKernel / @ref arm_compute::CLChannelExtract + - @ref arm_compute::CLFastCornersKernel / @ref arm_compute::CLFastCorners + - @ref arm_compute::CLMeanStdDevKernel / @ref arm_compute::CLMeanStdDev + - New NEON kernels / functions: + - HOG / SVM: @ref arm_compute::NEHOGOrientationBinningKernel, @ref arm_compute::NEHOGBlockNormalizationKernel, @ref arm_compute::NEHOGDetectorKernel, arm_compute::NEHOGNonMaximaSuppressionKernel / @ref arm_compute::NEHOGDescriptor, @ref arm_compute::NEHOGDetector, @ref arm_compute::NEHOGGradient, @ref arm_compute::NEHOGMultiDetection + - @ref arm_compute::NENonLinearFilterKernel / @ref arm_compute::NENonLinearFilter + - Introduced a CLScheduler to manage the default context and command queue used by the runtime library and create synchronisation events. + - Switched all the kernels / functions to use tensors instead of images. + - Updated documentation to include instructions to build the library from sources. + +v16.12 Binary preview release + - Original release + +@section S3_how_to_build How to build the library and the examples + +@subsection S3_1_build_options Build options + +scons 2.3 or above is required to build the library. +To see the build options available simply run ```scons -h```: + + debug: Debug (default=0) (0|1) + default: 0 + actual: 0 + + asserts: Enable asserts (This flag is forced to 1 for debug=1) (default=0) (0|1) + default: 0 + actual: 0 + + arch: Target Architecture (default=armv7a) (armv7a|arm64-v8a|arm64-v8.2-a|x86_32|x86_64) + default: armv7a + actual: armv7a + + os: Target OS (default=linux) (linux|android|bare_metal) + default: linux + actual: linux + + build: Build type: (default=cross_compile) (native|cross_compile) + default: cross_compile + actual: cross_compile + + Werror: Enable/disable the -Werror compilation flag (Default=1) (0|1) + default: 1 + actual: 1 + + opencl: Enable OpenCL support(Default=1) (0|1) + default: 1 + actual: 1 + + neon: Enable Neon support(Default=0) (0|1) + default: 0 + actual: 0 + + embed_kernels: Embed OpenCL kernels in library binary(Default=0) (0|1) + default: 0 + actual: 0 + + scheduler: Scheduler backend(Default=cpp) (cpp|pthread|openmp) + default: cpp + actual: cpp + + set_soname: Set the library's soname and shlibversion (Requires SCons 2.4 or above) (yes|no) + default: 0 + actual: False + + extra_cxx_flags: Extra CXX flags to be appended to the build command + default: + actual: + +Debug / asserts: + - With debug=1 asserts are enabled, and the library is built with symbols and no optimisations enabled. + - With debug=0 and asserts=1: Optimisations are enabled and symbols are removed, however all the asserts are still present (This is about 20% slower than the release build) + - With debug=0 and asserts=0: All optimisations are enable and no validation is performed, if the application misuses the library it is likely to result in a crash. (Only use this mode once you are sure your application is working as expected). + +Architecture: The x86_32 and x86_64 targets can only be used with neon=0 and opencl=1. + +OS: Choose the operating system you are targeting: Linux, Android or bare metal. +@note bare metal can only be used for NEON (not OpenCL), only static libraries get built and NEON's multi-threading support is disabled. + +Build type: you can either build directly on your device (native) or cross compile from your desktop machine (cross-compile). In both cases make sure the compiler is available in your path. + +Werror: If you are compiling using the same toolchains as the ones used in this guide then there shouldn't be any warning and therefore you should be able to keep Werror=1. If with a different compiler version the library fails to build because of warnings interpreted as errors then, if you are sure the warnings are not important, you might want to try to build with Werror=0 (But please do report the issue either on Github or by an email to developer@arm.com so that the issue can be addressed). + +OpenCL / NEON: Choose which SIMD technology you want to target. (NEON for ARM Cortex-A CPUs or OpenCL for ARM Mali GPUs) + +embed_kernels: For OpenCL only: set embed_kernels=1 if you want the OpenCL kernels to be built in the library's binaries instead of being read from separate ".cl" files. If embed_kernels is set to 0 then the application can set the path to the folder containing the OpenCL kernel files by calling CLKernelLibrary::init(). By default the path is set to "./cl_kernels". + +set_soname: Do you want to build the versioned version of the library ? +If enabled the library will contain a SONAME and SHLIBVERSION and some symlinks will automatically be created between the objects. +Example: + libarm_compute_core.so -> libarm_compute_core.so.1.0.0 + libarm_compute_core.so.1 -> libarm_compute_core.so.1.0.0 + libarm_compute_core.so.1.0.0 + +@note This options is disabled by default as it requires SCons version 2.4 or above. + +extra_cxx_flags: Custom CXX flags which will be appended to the end of the build command. + +@subsection S3_2_linux Linux + +@subsubsection S3_2_1_library How to build the library ? + +For Linux, the library was successfully built and tested using the following Linaro GCC toolchain: + + - gcc-linaro-arm-linux-gnueabihf-4.9-2014.07_linux + - gcc-linaro-4.9-2016.02-x86_64_aarch64-linux-gnu + - gcc-linaro-6.3.1-2017.02-i686_aarch64-linux-gnu + +@note If you are building with opencl=1 then scons will expect to find libOpenCL.so either in the current directory or in "build" (See the section below if you need a stub OpenCL library to link against) + +To cross-compile the library in debug mode, with NEON only support, for Linux 32bit: + + scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=linux arch=armv7a + +To cross-compile the library in asserts mode, with OpenCL only support, for Linux 64bit: + + scons Werror=1 -j8 debug=0 asserts=1 neon=0 opencl=1 embed_kernels=1 os=linux arch=arm64-v8a + +You can also compile the library natively on an ARM device by using build=native: + + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=arm64-v8a build=native + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a build=native + +@note g++ for ARM is mono-arch, therefore if you want to compile for Linux 32bit on a Linux 64bit platform you will have to use a cross compiler. + +For example on a 64bit Debian based system you would have to install g++-arm-linux-gnueabihf + + apt-get install g++-arm-linux-gnueabihf + +Then run + + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a build=cross_compile + +or simply remove the build parameter as build=cross_compile is the default value: + + scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux arch=armv7a + +@attention To cross compile with opencl=1 you need to make sure to have a version of libOpenCL matching your target architecture. + +@subsubsection S3_2_2_examples How to manually build the examples ? + +The examples get automatically built by scons as part of the build process of the library described above. This section just describes how you can build and link your own application against our library. + +@note The following command lines assume the arm_compute and libOpenCL binaries are present in the current directory or in the system library path. If this is not the case you can specify the location of the pre-built library with the compiler option -L. When building the OpenCL example the commands below assume that the CL headers are located in the include folder where the command is executed. + +To cross compile a NEON example for Linux 32bit: + + arm-linux-gnueabihf-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -mfpu=neon -L. -larm_compute -o neon_convolution + +To cross compile a NEON example for Linux 64bit: + + aarch64-linux-gnu-g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -L. -larm_compute -o neon_convolution + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) + +To cross compile an OpenCL example for Linux 32bit: + + arm-linux-gnueabihf-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -mfpu=neon -L. -larm_compute -lOpenCL -o cl_convolution + +To cross compile an OpenCL example for Linux 64bit: + + aarch64-linux-gnu-g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -L. -larm_compute -lOpenCL -o cl_convolution + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option and the compiler's name is different) + +To compile natively (i.e directly on an ARM device) for NEON for Linux 32bit: + + g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -mfpu=neon -larm_compute -o neon_convolution + +To compile natively (i.e directly on an ARM device) for NEON for Linux 64bit: + + g++ examples/neon_convolution.cpp utils/Utils.cpp -I. -std=c++11 -larm_compute -o neon_convolution + +(notice the only difference with the 32 bit command is that we don't need the -mfpu option) + +To compile natively (i.e directly on an ARM device) for OpenCL for Linux 32bit or Linux 64bit: + + g++ examples/cl_convolution.cpp utils/Utils.cpp -I. -Iinclude -std=c++11 -larm_compute -lOpenCL -o cl_convolution + + +@note These two commands assume libarm_compute.so is available in your library path, if not add the path to it using -L + +To run the built executable simply run: + + LD_LIBRARY_PATH=build ./neon_convolution + +or + + LD_LIBRARY_PATH=build ./cl_convolution + +@note If you built the library with support for both OpenCL and NEON you will need to link against OpenCL even if your application only uses NEON. + +@subsection S3_3_android Android + +For Android, the library was successfully built and tested using Google's standalone toolchains: + - arm-linux-androideabi-4.9 for armv7a (clang++) + - aarch64-linux-android-4.9 for arm64-v8a (g++) + +Here is a guide to create your Android standalone toolchains from the NDK + +- Download the NDK r14 from here: https://developer.android.com/ndk/downloads/index.html +- Make sure you have Python 2 installed on your machine. +- Generate the 32 and/or 64 toolchains by running the following commands: + + + $NDK/build/tools/make_standalone_toolchain.py --arch arm64 --install-dir $MY_TOOLCHAINS/aarch64-linux-android-4.9 --stl gnustl + $NDK/build/tools/make_standalone_toolchain.py --arch arm --install-dir $MY_TOOLCHAINS/arm-linux-androideabi-4.9 --stl gnustl + +@attention Due to some NDK issues make sure you use g++ & gnustl for aarch64 and clang++ & gnustl for armv7 + +@note Make sure to add the toolchains to your PATH: export PATH=$PATH:$MY_TOOLCHAINS/aarch64-linux-android-4.9/bin:$MY_TOOLCHAINS/arm-linux-androideabi-4.9/bin + +@subsubsection S3_3_1_library How to build the library ? + +@note If you are building with opencl=1 then scons will expect to find libOpenCL.so either in the current directory or in "build" (See the section below if you need a stub OpenCL library to link against) + +To cross-compile the library in debug mode, with NEON only support, for Android 32bit: + + CXX=clang++ CC=clang scons Werror=1 -j8 debug=1 neon=1 opencl=0 os=android arch=armv7a + +To cross-compile the library in asserts mode, with OpenCL only support, for Android 64bit: + + scons Werror=1 -j8 debug=0 asserts=1 neon=0 opencl=1 embed_kernels=1 os=android arch=arm64-v8a + +@subsubsection S3_3_2_examples How to manually build the examples ? + +The examples get automatically built by scons as part of the build process of the library described above. This section just describes how you can build and link your own application against our library. + +@note The following command lines assume the arm_compute binaries are present in the current directory or in the system library path. + +Once you've got your Android standalone toolchain built and added to your path you can do the following: + +To cross compile a NEON example: + + #32 bit: + arm-linux-androideabi-clang++ examples/neon_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_arm -static-libstdc++ -pie + #64 bit: + aarch64-linux-android-g++ examples/neon_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o neon_convolution_aarch64 -static-libstdc++ -pie + +To cross compile an OpenCL example: + + #32 bit: + arm-linux-androideabi-clang++ examples/cl_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_arm -static-libstdc++ -pie -lOpenCL + #64 bit: + aarch64-linux-android-g++ examples/cl_convolution.cpp -I. -Iinclude -std=c++11 -larm_compute-static -L. -o cl_convolution_aarch64 -static-libstdc++ -pie -lOpenCL + +@note Due to some issues in older versions of the Mali OpenCL DDK (<= r13p0), we recommend to link arm_compute statically on Android. + +Then you need to do is upload the executable and the shared library to the device using ADB: + + adb push neon_convolution_arm /data/local/tmp/ + adb push cl_convolution_arm /data/local/tmp/ + adb shell chmod 777 -R /data/local/tmp/ + +And finally to run the example: + + adb shell /data/local/tmp/neon_convolution_arm + adb shell /data/local/tmp/cl_convolution_arm + +For 64bit: + + adb push neon_convolution_aarch64 /data/local/tmp/ + adb push cl_convolution_aarch64 /data/local/tmp/ + adb shell chmod 777 -R /data/local/tmp/ + +And finally to run the example: + + adb shell /data/local/tmp/neon_convolution_aarch64 + adb shell /data/local/tmp/cl_convolution_aarch64 + +@subsection S3_4_cl_stub_library The OpenCL stub library + +In the opencl-1.2-stubs folder you will find the sources to build a stub OpenCL library which then can be used to link your application or arm_compute against. + +If you preferred you could retrieve the OpenCL library from your device and link against this one but often this library will have dependencies on a range of system libraries forcing you to link your application against those too even though it is not using them. + +@warning This OpenCL library provided is a stub and *not* a real implementation. You can use it to resolve OpenCL's symbols in arm_compute while building the example but you must make sure the real libOpenCL.so is in your PATH when running the example or it will not work. + +To cross-compile the stub OpenCL library simply run: + + -gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + +For example: + + -gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + #Linux 32bit + arm-linux-gnueabihf-gcc -o libOpenCL.so -Iinclude opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + #Linux 64bit + aarch64-linux-gnu-gcc -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC + #Android 32bit + arm-linux-androideabi-clang -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC -shared + #Android 64bit + aarch64-linux-android-gcc -o libOpenCL.so -Iinclude -shared opencl-1.2-stubs/opencl_stubs.c -fPIC -shared +*/ diff --git a/docs/01_library.dox b/docs/01_library.dox new file mode 100644 index 0000000000..738579e7c6 --- /dev/null +++ b/docs/01_library.dox @@ -0,0 +1,250 @@ +namespace arm_compute +{ +/** +@page architecture Library architecture + +@tableofcontents + +@section S4_1 Core vs Runtime libraries + +The Core library is a low level collection of algorithms implementations, it is designed to be embedded in existing projects and applications: + +- It doesn't allocate any memory (All the memory allocations/mappings have to be handled by the caller). +- It doesn't perform any kind of multi-threading (but provide information to the caller about how the workload can be split). + +The Runtime library is a very basic wrapper around the Core library which can be used for quick prototyping, it is basic in the sense that: + +- It allocates images and tensors by using standard malloc(). +- It multi-threads NEON code in a very basic way using a very simple pool of threads. +- For OpenCL it uses the default CLScheduler command queue for all mapping operations and kernels. + +For maximum performance, it is expected that the users would re-implement an equivalent to the runtime library which suits better their needs (With a more clever multi-threading strategy, load-balancing between NEON and OpenCL, etc.) + +@section S4_2_windows_kernels_mt_functions Windows, kernels, multi-threading and functions + +@subsection S4_2_1_windows Windows + +A @ref Window represents a workload to execute, it can handle up to @ref Coordinates::num_max_dimensions dimensions. +Each dimension is defined by a start, end and step. + +It can split into subwindows as long as *all* the following rules remain true for all the dimensions: + +- max[n].start() <= sub[n].start() < max[n].end() +- sub[n].start() < sub[n].end() <= max[n].end() +- max[n].step() == sub[n].step() +- (sub[n].start() - max[n].start()) % max[n].step() == 0 +- (sub[n].end() - sub[n].start()) % max[n].step() == 0 + +@subsection S4_2_2 Kernels + +Each implementation of the @ref IKernel interface (base class of all the kernels in the core library) works in the same way: + +OpenCL kernels: + +@code{.cpp} +// Initialize the CLScheduler with the default context and default command queue +// Implicitly initializes the CLKernelLibrary to use ./cl_kernels as location for OpenCL kernels files and sets a default device for which OpenCL programs are built. +CLScheduler::get().default_init(); + +cl::CommandQueue q = CLScheduler::get().queue(); +//Create a kernel object: +MyKernel kernel; +// Initialize the kernel with the input/output and options you want to use: +kernel.configure( input, output, option0, option1); +// Retrieve the execution window of the kernel: +const Window& max_window = kernel.window(); +// Run the whole kernel in the current thread: +kernel.run( q, max_window ); // Enqueue the kernel to process the full window on the default queue + +// Wait for the processing to complete: +q.finish(); +@endcode + +NEON / CPP kernels: + +@code{.cpp} +//Create a kernel object: +MyKernel kernel; +// Initialize the kernel with the input/output and options you want to use: +kernel.configure( input, output, option0, option1); +// Retrieve the execution window of the kernel: +const Window& max_window = kernel.window(); +// Run the whole kernel in the current thread: +kernel.run( max_window ); // Run the kernel on the full window +@endcode + +@subsection S4_2_3 Multi-threading + +The previous section shows how to run a NEON / CPP kernel in the current thread, however if your system has several CPU cores, you will probably want the kernel to use several cores. Here is how this can be done: + +@snippet src/runtime/CPP/CPPScheduler.cpp Scheduler example + +This is the very basic implementation used in the NEON runtime library by all the NEON functions. + +@sa CPPScheduler. + +@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and each subwindow must be initialized by calling @ref Window::set_thread_id() with a unique thread_id between 0 and num_threads. + +@subsection S4_2_4 Functions + +Functions will automatically allocate the temporary buffers mentioned above, and will automatically multi-thread kernels' executions using the very basic scheduler described in the previous section. + +Simple functions only call a single kernel (e.g @ref NEConvolution3x3), while more complex ones consist of several kernels pipelined together (e.g @ref NEGaussianPyramid, @ref NEHarrisCorners). Check their documentation to find out which kernels are used by each function. + +@code{.cpp} +//Create a function object: +MyFunction function; +// Initialize the function with the input/output and options you want to use: +function.configure( input, output, option0, option1); +// Execute the function: +function.run(); +@endcode + +@warning The Compute Library requires Mali OpenCL DDK r8p0 or higher (OpenCL kernels are compiled using the -cl-arm-non-uniform-work-group-size flag) + +@note All OpenCL functions and objects in the runtime library use the command queue associated with CLScheduler for all operations, a real implementation would be expected to use different queues for mapping operations and kernels in order to reach a better GPU utilization. + +@subsection S4_4_1_cl_scheduler OpenCL Scheduler and kernel library + +The Compute Library runtime uses a single command queue and context for all the operations. + +The user can get / set this context and command queue through CLScheduler's interface. + +The user can get / set the target GPU device through the CLScheduler's interface. + +@attention Make sure the application is using the same context as the library as in OpenCL it is forbidden to share objects across contexts. This is done by calling @ref CLScheduler::init() or @ref CLScheduler::default_init() at the beginning of your application. + +@attention Make sure the scheduler's target is not changed after function classes are created. + +All OpenCL kernels used by the library are built and stored in @ref CLKernelLibrary. +If the library is compiled with embed_kernels=0 the application can set the path to the OpenCL kernels by calling @ref CLKernelLibrary::init(), by default the path is set to "./cl_kernels" + +@subsection S4_4_2_events_sync OpenCL events and synchronization + +In order to block until all the jobs in the CLScheduler's command queue are done executing the user can call @ref CLScheduler::sync() or create a sync event using @ref CLScheduler::enqueue_sync_event() + +For example: +@snippet cl_events.cpp OpenCL events + +@subsection S4_4_2_cl_neon OpenCL / NEON interoperability + +You can mix OpenCL and NEON kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects, for example: + +@snippet neoncl_scale_median_gaussian.cpp NEON / OpenCL Interop + +@sa main_neoncl_scale_median_gaussian + +@section S4_5_algorithms Algorithms + +All algorithms in this library have been implemented following the [OpenVX 1.1 specifications](https://www.khronos.org/registry/vx/specs/1.1/html/). Please refer to the Khronos documentation for more information. + +@section S4_6_images_tensors Images, padding, border modes and tensors + +Most kernels and functions in the library process images, however, in order to be future proof most of the kernels actually accept tensors. See below for more information about how they are related. + +@attention Each memory object can be written by only one kernel, however it can be read by several kernels. Writing to the same object from several kernels will result in undefined behavior. The kernel writing to an object must be configured before the kernel(s) reading from it. + +@subsection S4_6_1_padding_and_border Padding and border modes + +Several algorithms require a neighborhood around the current pixel to compute it's value. This means the algorithm will not be able to process the borders of the image unless you give it more information about how those border pixels should be processed. The @ref BorderMode enum is used for this purpose. + +You have 3 types of @ref BorderMode : + +- @ref BorderMode::UNDEFINED : Neighbor pixels outside of the image are treated as undefined. As a result all the pixels which are on the border will have a value which is undefined. +- @ref BorderMode::REPLICATE : Neighbor pixels outside of the image are treated as having the same value as the closest valid pixel. +- @ref BorderMode::CONSTANT : Neighbor pixels outside of the image are treated as having the same constant value. (The user can choose what this value should be). + +Moreover both OpenCL and NEON use vector loads and stores instructions to access the data in buffers, so in order to avoid having special cases to handle for the borders all the images and tensors used in this library must be padded. + +@subsubsection padding Padding + +There are different ways padding can be calculated: + +- Accurate padding: + +@snippet neon_convolution.cpp Accurate padding + +@note It's important to call allocate @b after the function is configured: if the image / tensor is already allocated then the function will shrink its execution window instead of increasing the padding. (See below for more details). + +- Manual padding / no padding / auto padding: You can allocate your images / tensors up front (before configuring your functions). In that case the function will use whatever padding is available and will shrink its execution window if there isn't enough padding available (which translates into a smaller valid region for the output). See also @ref valid_region). +If you don't want to manually set the padding but still want to allocate your objects upfront then you can use auto_padding. It guarantees that the allocation will have enough padding to run any of the provided functions. + +@code{.cpp} +Image src, dst; + +// Use auto padding for the input: +src.info()->init_auto_padding(TensorShape(640u,480u), Format::U8); + +// Use manual padding for the destination image +dst.info()->init(src.info()->tensor_shape(), Format::U8, strides_in_bytes, offset_first_element_in_bytes, total_size_in_bytes); + +// Allocate all the images +src.allocator()->allocate(); +dst.allocator()->allocate(); +// Fill the input image with the content of the PPM image if a filename was provided: +fill_image(src); + +NEGaussian3x3 gauss; + +// Apply a Gaussian 3x3 filter to the source image (Note: if the padding provided is not enough then the execution window and valid region of the output will be shrunk) +gauss.configure(&src, &dst, BorderMode::UNDEFINED); + +//Execute the functions: +gauss.run(); +@endcode + +@warning Some kernels need up to 3 neighbor values to calculate the value of a given pixel. Therefore, to be safe, we use a 4-pixel padding all around the image. In addition, some kernels read and write up to 32 pixels at the same time. To cover that case as well we add an extra 32 pixels of padding at the end of each row. As a result auto padded buffers waste a lot of memory and are less cache friendly. It is therefore recommended to use accurate padding or manual padding wherever possible. + +@subsubsection valid_region Valid regions + +Some kernels (like edge detectors for example) need to read values of neighboring pixels to calculate the value of a given pixel, it is therefore not possible to calculate the values of the pixels on the edges. + +Another case is: if a kernel processes 8 pixels per iteration and the image's dimensions are not a multiple of 8 and not enough padding is available then the kernel will not be able to process the pixels near the right edge. As a result these pixels will be left undefined. + +In order to know which pixels have been calculated, each kernel sets a valid region for each output image or tensor. See also @ref TensorInfo::valid_region(), @ref ValidRegion + +@subsection S4_6_2_tensors Tensors + +Tensors are multi-dimensional arrays with a maximum of @ref Coordinates::num_max_dimensions dimensions. + +Depending on the number of dimensions tensors can be interpreted as various objects. A scalar can be represented as a zero-dimensional tensor and a vector of numbers can be represented as an one-dimensional tensor. Further, an image is actually just a 2D tensor, a 3D tensor can be seen as an array of images and a 4D tensor as a 2D array of images, etc. + +@note Most algorithms process images (i.e a 2D slice of the tensor), therefore only padding along the X and Y axes is required (2D slices can be stored contiguously in memory). + +@subsection S4_6_3_description_conventions Images and Tensors description conventions + +Image objects are defined by a @ref Format and dimensions expressed as [width, height, batch] + +Tensors are defined by a @ref DataType plus a number of channels (Always expected to be 1 for now) and their dimensions are expressed as [width, height, feature_maps, batch]. + +In other words, the lower three dimensions of a tensor specify a single input in [width, height, feature_maps], while any other specified dimension represents a batch in the appropriate dimension space. +For example, a tensor with dimensions [128, 128, 64, 16] represents a 1D batch space with 16 batches of 128 elements in width and height and 64 feature maps each. +Each kernel specifies the expected layout of each of its tensors in its documentation. + +@note Unless specified otherwise in the kernel's or function's documentation all tensors and images parameters passed must have identical dimensions. + +@note Unless specified otherwise in the kernel's or function's documentation the number of channels for tensors is expected to be 1 (For images, the number of channels is inferred from the @ref Format). + +@attention Regardless of the @ref DataType used by a tensor the @ref ITensor::buffer() method will always return a uint8_t pointer, and all the metadata in @ref TensorInfo will be expressed in bytes. It is the user's responsibility to cast the pointer to the correct type. + +For example, to read the element located at the coordinates (x,y) of a float tensor: + +@code{.cpp} +float value = *reinterpret_cast(input.buffer() + input.info()->offset_element_in_bytes(Coordinates(x,y))); +@endcode + +@subsection S4_6_4_working_with_objects Working with Images and Tensors using iterators + +The library provides some iterators to access objects' data. +Iterators are created by associating a data object (An image or a tensor for example) with an iteration window. + +Iteration windows are defined by an array of dimensions, each of which consists of a start, end and step. + +The @ref execute_window_loop function takes an execution window, a lambda function and one or more iterators. +It will iterate through every element of the execution window and for each element it will update the iterators accordingly and call the lambda function. + +Here are a couple of examples of how to use the iterators to fill / read tensors: + +@snippet examples/neon_copy_objects.cpp Copy objects example +*/ +} // namespace arm_compute diff --git a/docs/02_tests.dox b/docs/02_tests.dox new file mode 100644 index 0000000000..fd5bc59194 --- /dev/null +++ b/docs/02_tests.dox @@ -0,0 +1,93 @@ +/** +@page tests Test architecture + +@tableofcontents + +@section building_test_dependencies Building dependencies + +The tests currently make use of Boost (Test and Program options) for validation +and Google Benchmark for performance runs. Below are instructions about how to +build these 3rd party libraries. + +@subsection building_boost Building Boost + +First follow the instructions from the Boost library on how to setup the Boost +build system +(http://www.boost.org/doc/libs/1_64_0/more/getting_started/index.html). +Afterwards the required libraries can be build with: + + ./b2 --with-program_options --with-test link=static \ + define=BOOST_TEST_ALTERNATIVE_INIT_API + +Additionally, depending on your environment, it might be necessary to specify +the ```toolset=``` option to choose the right compiler. Moreover, +```address-model=32``` can be used to force building for 32bit and +```target-os=android``` must be specified to build for Android. + +After executing the build command the libraries +```libboost_program_options.a``` and ```libboost_unit_test_framework.a``` can +be found in ```./stage/lib```. + +@subsection building_google_benchmark Building Google Benchmark + +Instructions on how to build Google Benchmark using CMake can be found in their +repository: https://github.com/google/benchmark. For example, building for +Android 32bit can be achieved via + + cmake -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=arm-linux-androideabi-clang++ \ + -DBENCHMARK_ENABLE_LTO=false -DBENCHMARK_ENABLE_TESTING=false .. + +The library required by the compute library is ```libbenchmark.a```. + +@section tests_running_tests Running tests +@subsection tests_running_tests_benchmarking Benchmarking +@subsubsection tests_running_tests_benchmarking_filter Filter tests +All tests can be run by invoking + + ./arm_compute_benchmark -- ./data + +where `./data` contains the assets needed by the tests. + +If only a subset of the tests has to be executed the `--benchmark_filter` option takes a regular expression to select matching tests. + + ./arm_compute_benchmark --benchmark_filter=neon_bitwise_and ./data + +All available tests can be displayed with the `--benchmark_list_tests` switch. + + ./arm_compute_benchmark --benchmark_list_tests ./data + +@subsubsection tests_running_tests_benchmarking_runtime Runtime +By default every test is run multiple *iterations* until a minimum time is reached. The minimum time (in seconds) can be controlled with the `--benchmark_min_time` flag. However, each test might have a hard coded value for the number of iterations or minimum execution time. In that case the command line argument is ignored for those specific tests. +Additionally it is possible to specify multiple *repetitions* (`--benchmark_repetitions`) which will run each test multiple times (including the iterations). The average and standard deviation for all repetitions is automatically computed and reported. + +@subsubsection tests_running_tests_benchmarking_verbosity Verbosity +The verbosity of the test output can be controlled via the `--v` flag. Though it should hardly ever be necessary. + +@subsection tests_running_tests_validation Validation +@subsubsection tests_running_tests_validation_filter Filter tests +All tests can be run by invoking + + ./arm_compute_validation -- ./data + +where `./data` contains the assets needed by the tests. + +As running all tests can take a lot of time the suite is split into "precommit" and "nightly" tests. The precommit tests will be fast to execute but still cover the most important features. In contrast the nightly tests offer more extensive coverage but take longer. The different subsets can be selected from the command line as follows: + + ./arm_compute_validation -t @precommit -- ./data + ./arm_compute_validation -t @nightly -- ./data + +Additionally it is possible to select specific suites or tests: + + ./arm_compute_validation -t CL -- ./data + ./arm_compute_validation -t NEON/BitwiseAnd/RunSmall/_0 -- ./data + +All available tests can be displayed with the `--list_content` switch. + + ./arm_compute_validation --list_content -- ./data + +For a complete list of possible selectors please see: http://www.boost.org/doc/libs/1_64_0/libs/test/doc/html/boost_test/runtime_config/test_unit_filtering.html + +@subsubsection tests_running_tests_validation_verbosity Verbosity +There are two separate flags to control the verbosity of the test output. `--report_level` controls the verbosity of the summary produced after all tests have been executed. `--log_level` controls the verbosity of the information generated during the execution of tests. All available settings can be found in the Boost documentation for [--report_level](http://www.boost.org/doc/libs/1_64_0/libs/test/doc/html/boost_test/utf_reference/rt_param_reference/report_level.html) and [--log_level](http://www.boost.org/doc/libs/1_64_0/libs/test/doc/html/boost_test/utf_reference/rt_param_reference/log_level.html), respectively. +*/ diff --git a/docs/Doxyfile b/docs/Doxyfile new file mode 100644 index 0000000000..e70766b916 --- /dev/null +++ b/docs/Doxyfile @@ -0,0 +1,2458 @@ +# Doxyfile 1.8.9.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "Compute Library" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = 0.0 + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = build/docs/ + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +#ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +#STRIP_FROM_PATH = arm_compute/ + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = cl=C + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = NO + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = YES + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO, these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES, upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = YES + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +#HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +#SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = YES + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = YES + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line:[DOXY_WARN] $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = ./docs/00_introduction.dox \ + ./docs/01_library.dox \ + ./docs/02_tests.dox \ + ./arm_compute/ \ + ./src/core/CL/cl_kernels/ \ + ./examples/ \ + ./tests/ \ + ./utils/ + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.f90 \ + *.f \ + *.for \ + *.tcl \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf \ + *.as \ + *.js \ + *.cl + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = ./examples/ \ + . \ + ./arm_compute/ + +# "." is Needed by the release script + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = ./docs/ + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = YES + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# compiled with the --with-libclang option. +# The default value is: NO. + +#CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +#CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .xhtml + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = ./docs/header.html + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the master .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = YES + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = YES + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /