From 27e67f0b2047cfa2f011f9e242e3068d9e106b39 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 16 Feb 2021 11:34:39 +0000 Subject: Remove Compute Vision Neon support Resolves COMPMID-4150 Change-Id: I316e8ab97de796666c71eadfde894715fcf4a1aa Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5141 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- Android.bp | 72 -- arm_compute/runtime/NEON/NEFunctions.h | 41 - .../runtime/NEON/functions/NEAbsoluteDifference.h | 65 -- arm_compute/runtime/NEON/functions/NEAccumulate.h | 122 --- arm_compute/runtime/NEON/functions/NEBox3x3.h | 60 -- arm_compute/runtime/NEON/functions/NECannyEdge.h | 106 -- .../runtime/NEON/functions/NEChannelCombine.h | 62 -- .../runtime/NEON/functions/NEChannelExtract.h | 60 -- .../runtime/NEON/functions/NEColorConvert.h | 71 -- arm_compute/runtime/NEON/functions/NEDerivative.h | 82 -- arm_compute/runtime/NEON/functions/NEDilate.h | 57 - .../runtime/NEON/functions/NEEqualizeHistogram.h | 89 -- arm_compute/runtime/NEON/functions/NEErode.h | 57 - arm_compute/runtime/NEON/functions/NEFastCorners.h | 96 -- arm_compute/runtime/NEON/functions/NEGaussian3x3.h | 57 - arm_compute/runtime/NEON/functions/NEGaussian5x5.h | 88 -- .../runtime/NEON/functions/NEGaussianPyramid.h | 154 --- .../runtime/NEON/functions/NEHOGDescriptor.h | 90 -- arm_compute/runtime/NEON/functions/NEHOGDetector.h | 73 -- arm_compute/runtime/NEON/functions/NEHOGGradient.h | 88 -- .../runtime/NEON/functions/NEHOGMultiDetection.h | 119 --- .../runtime/NEON/functions/NEHarrisCorners.h | 119 --- arm_compute/runtime/NEON/functions/NEHistogram.h | 80 -- .../runtime/NEON/functions/NEIntegralImage.h | 61 -- .../runtime/NEON/functions/NELaplacianPyramid.h | 98 -- .../NEON/functions/NELaplacianReconstruct.h | 104 -- arm_compute/runtime/NEON/functions/NEMagnitude.h | 64 -- arm_compute/runtime/NEON/functions/NEMeanStdDev.h | 77 -- arm_compute/runtime/NEON/functions/NEMedian3x3.h | 58 - .../runtime/NEON/functions/NEMinMaxLocation.h | 86 -- .../runtime/NEON/functions/NENonLinearFilter.h | 64 -- arm_compute/runtime/NEON/functions/NEOpticalFlow.h | 108 -- arm_compute/runtime/NEON/functions/NEPhase.h | 53 - arm_compute/runtime/NEON/functions/NERemap.h | 63 -- arm_compute/runtime/NEON/functions/NEScharr3x3.h | 61 -- arm_compute/runtime/NEON/functions/NESobel3x3.h | 61 -- arm_compute/runtime/NEON/functions/NESobel5x5.h | 92 -- arm_compute/runtime/NEON/functions/NESobel7x7.h | 92 -- arm_compute/runtime/NEON/functions/NETableLookup.h | 47 - arm_compute/runtime/NEON/functions/NEThreshold.h | 65 -- arm_compute/runtime/NEON/functions/NEWarpAffine.h | 57 - .../runtime/NEON/functions/NEWarpPerspective.h | 56 - docs/00_introduction.dox | 68 +- docs/01_library.dox | 10 +- docs/06_functions_list.dox | 44 - examples/neon_cartoon_effect.cpp | 115 -- examples/neon_opticalflow.cpp | 263 ----- examples/neoncl_scale_median_gaussian.cpp | 142 --- src/core/NEON/NEKernels.h | 33 - .../NEON/kernels/NEAbsoluteDifferenceKernel.cpp | 209 ---- src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h | 86 -- src/core/NEON/kernels/NEAccumulateKernel.cpp | 359 ------- src/core/NEON/kernels/NEAccumulateKernel.h | 183 ---- src/core/NEON/kernels/NEBox3x3Kernel.cpp | 194 ---- src/core/NEON/kernels/NEBox3x3Kernel.h | 95 -- src/core/NEON/kernels/NECannyEdgeKernel.cpp | 1122 -------------------- src/core/NEON/kernels/NECannyEdgeKernel.h | 189 ---- src/core/NEON/kernels/NEChannelCombineKernel.cpp | 456 -------- src/core/NEON/kernels/NEChannelCombineKernel.h | 129 --- src/core/NEON/kernels/NEChannelExtractKernel.cpp | 269 ----- src/core/NEON/kernels/NEChannelExtractKernel.h | 111 -- src/core/NEON/kernels/NEColorConvertKernel.cpp | 590 ---------- src/core/NEON/kernels/NEColorConvertKernel.h | 93 -- src/core/NEON/kernels/NEDerivativeKernel.cpp | 231 ---- src/core/NEON/kernels/NEDerivativeKernel.h | 100 -- src/core/NEON/kernels/NEDilateKernel.cpp | 128 --- src/core/NEON/kernels/NEDilateKernel.h | 65 -- src/core/NEON/kernels/NEErodeKernel.cpp | 128 --- src/core/NEON/kernels/NEErodeKernel.h | 65 -- src/core/NEON/kernels/NEFastCornersKernel.cpp | 475 --------- src/core/NEON/kernels/NEFastCornersKernel.h | 78 -- src/core/NEON/kernels/NEGaussian3x3Kernel.cpp | 135 --- src/core/NEON/kernels/NEGaussian3x3Kernel.h | 66 -- src/core/NEON/kernels/NEGaussian5x5Kernel.cpp | 211 ---- src/core/NEON/kernels/NEGaussian5x5Kernel.h | 103 -- src/core/NEON/kernels/NEGaussianPyramidKernel.cpp | 272 ----- src/core/NEON/kernels/NEGaussianPyramidKernel.h | 105 -- src/core/NEON/kernels/NEHOGDescriptorKernel.cpp | 806 -------------- src/core/NEON/kernels/NEHOGDescriptorKernel.h | 149 --- src/core/NEON/kernels/NEHOGDetectorKernel.cpp | 189 ---- src/core/NEON/kernels/NEHOGDetectorKernel.h | 89 -- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 817 -------------- src/core/NEON/kernels/NEHarrisCornersKernel.h | 105 -- src/core/NEON/kernels/NEHistogramKernel.cpp | 249 ----- src/core/NEON/kernels/NEHistogramKernel.h | 135 --- src/core/NEON/kernels/NEIntegralImageKernel.cpp | 144 --- src/core/NEON/kernels/NEIntegralImageKernel.h | 66 -- src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 490 --------- src/core/NEON/kernels/NEMagnitudePhaseKernel.h | 101 -- src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 162 --- src/core/NEON/kernels/NEMeanStdDevKernel.h | 83 -- src/core/NEON/kernels/NEMedian3x3Kernel.cpp | 137 --- src/core/NEON/kernels/NEMedian3x3Kernel.h | 66 -- src/core/NEON/kernels/NEMinMaxLocationKernel.cpp | 478 --------- src/core/NEON/kernels/NEMinMaxLocationKernel.h | 171 --- src/core/NEON/kernels/NENonLinearFilterKernel.cpp | 1018 ------------------ src/core/NEON/kernels/NENonLinearFilterKernel.h | 153 --- .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 1 - src/core/NEON/kernels/NERemapKernel.cpp | 237 ----- src/core/NEON/kernels/NERemapKernel.h | 83 -- src/core/NEON/kernels/NEScharr3x3Kernel.cpp | 262 ----- src/core/NEON/kernels/NEScharr3x3Kernel.h | 86 -- src/core/NEON/kernels/NESobel3x3Kernel.cpp | 272 ----- src/core/NEON/kernels/NESobel3x3Kernel.h | 86 -- src/core/NEON/kernels/NESobel5x5Kernel.cpp | 406 ------- src/core/NEON/kernels/NESobel5x5Kernel.h | 126 --- src/core/NEON/kernels/NESobel7x7Kernel.cpp | 524 --------- src/core/NEON/kernels/NESobel7x7Kernel.h | 130 --- src/core/NEON/kernels/NETableLookupKernel.cpp | 143 --- src/core/NEON/kernels/NETableLookupKernel.h | 82 -- src/core/NEON/kernels/NEThresholdKernel.cpp | 217 ---- src/core/NEON/kernels/NEThresholdKernel.h | 88 -- src/core/NEON/kernels/NEWarpKernel.cpp | 807 -------------- src/core/NEON/kernels/NEWarpKernel.h | 131 --- .../NEON/functions/NEAbsoluteDifference.cpp | 40 - src/runtime/NEON/functions/NEAccumulate.cpp | 67 -- src/runtime/NEON/functions/NEBox3x3.cpp | 52 - src/runtime/NEON/functions/NECannyEdge.cpp | 196 ---- src/runtime/NEON/functions/NEChannelCombine.cpp | 44 - src/runtime/NEON/functions/NEChannelExtract.cpp | 44 - src/runtime/NEON/functions/NEColorConvert.cpp | 58 - src/runtime/NEON/functions/NEDerivative.cpp | 59 - src/runtime/NEON/functions/NEDilate.cpp | 43 - src/runtime/NEON/functions/NEEqualizeHistogram.cpp | 73 -- src/runtime/NEON/functions/NEErode.cpp | 44 - src/runtime/NEON/functions/NEFastCorners.cpp | 116 -- src/runtime/NEON/functions/NEGaussian3x3.cpp | 44 - src/runtime/NEON/functions/NEGaussian5x5.cpp | 74 -- src/runtime/NEON/functions/NEGaussianPyramid.cpp | 202 ---- src/runtime/NEON/functions/NEHOGDescriptor.cpp | 116 -- src/runtime/NEON/functions/NEHOGDetector.cpp | 38 - src/runtime/NEON/functions/NEHOGGradient.cpp | 94 -- src/runtime/NEON/functions/NEHOGMultiDetection.cpp | 270 ----- src/runtime/NEON/functions/NEHarrisCorners.cpp | 217 ---- src/runtime/NEON/functions/NEHistogram.cpp | 62 -- src/runtime/NEON/functions/NEIntegralImage.cpp | 46 - src/runtime/NEON/functions/NELaplacianPyramid.cpp | 112 -- .../NEON/functions/NELaplacianReconstruct.cpp | 106 -- src/runtime/NEON/functions/NEMagnitude.cpp | 50 - src/runtime/NEON/functions/NEMeanStdDev.cpp | 56 - src/runtime/NEON/functions/NEMedian3x3.cpp | 44 - src/runtime/NEON/functions/NEMinMaxLocation.cpp | 57 - src/runtime/NEON/functions/NENonLinearFilter.cpp | 46 - src/runtime/NEON/functions/NEOpticalFlow.cpp | 143 --- src/runtime/NEON/functions/NEPhase.cpp | 47 - src/runtime/NEON/functions/NERemap.cpp | 55 - src/runtime/NEON/functions/NEScharr3x3.cpp | 43 - src/runtime/NEON/functions/NESobel3x3.cpp | 44 - src/runtime/NEON/functions/NESobel5x5.cpp | 97 -- src/runtime/NEON/functions/NESobel7x7.cpp | 96 -- src/runtime/NEON/functions/NETableLookup.cpp | 37 - src/runtime/NEON/functions/NEThreshold.cpp | 43 - src/runtime/NEON/functions/NEWarpAffine.cpp | 64 -- src/runtime/NEON/functions/NEWarpPerspective.cpp | 65 -- tests/validation/NEON/AbsoluteDifference.cpp | 92 -- tests/validation/NEON/Accumulate.cpp | 119 --- tests/validation/NEON/Box3x3.cpp | 75 -- tests/validation/NEON/CannyEdge.cpp | 77 -- tests/validation/NEON/ChannelCombine.cpp | 112 -- tests/validation/NEON/ChannelExtract.cpp | 106 -- tests/validation/NEON/ColorConvert.cpp | 240 ----- tests/validation/NEON/Derivative.cpp | 75 -- tests/validation/NEON/Dilate.cpp | 75 -- tests/validation/NEON/EqualizeHistogram.cpp | 62 -- tests/validation/NEON/Erode.cpp | 75 -- tests/validation/NEON/FastCorners.cpp | 78 -- tests/validation/NEON/Gaussian3x3.cpp | 75 -- tests/validation/NEON/Gaussian5x5.cpp | 75 -- tests/validation/NEON/GaussianPyramid.cpp | 89 -- tests/validation/NEON/HOGDescriptor.cpp | 84 -- tests/validation/NEON/HOGDetector.cpp | 98 -- tests/validation/NEON/HOGMultiDetection.cpp | 96 -- tests/validation/NEON/HarrisCorners.cpp | 93 -- tests/validation/NEON/Histogram.cpp | 68 -- tests/validation/NEON/IntegralImage.cpp | 61 -- tests/validation/NEON/LaplacianPyramid.cpp | 109 -- tests/validation/NEON/LaplacianReconstruct.cpp | 100 -- tests/validation/NEON/Magnitude.cpp | 76 -- tests/validation/NEON/MeanStdDev.cpp | 73 -- tests/validation/NEON/Median3x3.cpp | 75 -- tests/validation/NEON/MinMaxLocation.cpp | 98 -- tests/validation/NEON/NonLinearFilter.cpp | 73 -- tests/validation/NEON/OpticalFlow.cpp | 92 -- tests/validation/NEON/Phase.cpp | 71 -- tests/validation/NEON/Remap.cpp | 77 -- tests/validation/NEON/Schaar.cpp | 76 -- tests/validation/NEON/Sobel.cpp | 256 ----- tests/validation/NEON/TableLookup.cpp | 83 -- tests/validation/NEON/Threshold.cpp | 66 -- tests/validation/NEON/WarpAffine.cpp | 79 -- tests/validation/NEON/WarpPerspective.cpp | 75 -- 191 files changed, 59 insertions(+), 26035 deletions(-) delete mode 100644 arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h delete mode 100644 arm_compute/runtime/NEON/functions/NEAccumulate.h delete mode 100644 arm_compute/runtime/NEON/functions/NEBox3x3.h delete mode 100644 arm_compute/runtime/NEON/functions/NECannyEdge.h delete mode 100644 arm_compute/runtime/NEON/functions/NEChannelCombine.h delete mode 100644 arm_compute/runtime/NEON/functions/NEChannelExtract.h delete mode 100644 arm_compute/runtime/NEON/functions/NEColorConvert.h delete mode 100644 arm_compute/runtime/NEON/functions/NEDerivative.h delete mode 100644 arm_compute/runtime/NEON/functions/NEDilate.h delete mode 100644 arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h delete mode 100644 arm_compute/runtime/NEON/functions/NEErode.h delete mode 100644 arm_compute/runtime/NEON/functions/NEFastCorners.h delete mode 100644 arm_compute/runtime/NEON/functions/NEGaussian3x3.h delete mode 100644 arm_compute/runtime/NEON/functions/NEGaussian5x5.h delete mode 100644 arm_compute/runtime/NEON/functions/NEGaussianPyramid.h delete mode 100644 arm_compute/runtime/NEON/functions/NEHOGDescriptor.h delete mode 100644 arm_compute/runtime/NEON/functions/NEHOGDetector.h delete mode 100644 arm_compute/runtime/NEON/functions/NEHOGGradient.h delete mode 100644 arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h delete mode 100644 arm_compute/runtime/NEON/functions/NEHarrisCorners.h delete mode 100644 arm_compute/runtime/NEON/functions/NEHistogram.h delete mode 100644 arm_compute/runtime/NEON/functions/NEIntegralImage.h delete mode 100644 arm_compute/runtime/NEON/functions/NELaplacianPyramid.h delete mode 100644 arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h delete mode 100644 arm_compute/runtime/NEON/functions/NEMagnitude.h delete mode 100644 arm_compute/runtime/NEON/functions/NEMeanStdDev.h delete mode 100644 arm_compute/runtime/NEON/functions/NEMedian3x3.h delete mode 100644 arm_compute/runtime/NEON/functions/NEMinMaxLocation.h delete mode 100644 arm_compute/runtime/NEON/functions/NENonLinearFilter.h delete mode 100644 arm_compute/runtime/NEON/functions/NEOpticalFlow.h delete mode 100644 arm_compute/runtime/NEON/functions/NEPhase.h delete mode 100644 arm_compute/runtime/NEON/functions/NERemap.h delete mode 100644 arm_compute/runtime/NEON/functions/NEScharr3x3.h delete mode 100644 arm_compute/runtime/NEON/functions/NESobel3x3.h delete mode 100644 arm_compute/runtime/NEON/functions/NESobel5x5.h delete mode 100644 arm_compute/runtime/NEON/functions/NESobel7x7.h delete mode 100644 arm_compute/runtime/NEON/functions/NETableLookup.h delete mode 100644 arm_compute/runtime/NEON/functions/NEThreshold.h delete mode 100644 arm_compute/runtime/NEON/functions/NEWarpAffine.h delete mode 100644 arm_compute/runtime/NEON/functions/NEWarpPerspective.h delete mode 100644 examples/neon_cartoon_effect.cpp delete mode 100644 examples/neon_opticalflow.cpp delete mode 100644 examples/neoncl_scale_median_gaussian.cpp delete mode 100644 src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp delete mode 100644 src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h delete mode 100644 src/core/NEON/kernels/NEAccumulateKernel.cpp delete mode 100644 src/core/NEON/kernels/NEAccumulateKernel.h delete mode 100644 src/core/NEON/kernels/NEBox3x3Kernel.cpp delete mode 100644 src/core/NEON/kernels/NEBox3x3Kernel.h delete mode 100644 src/core/NEON/kernels/NECannyEdgeKernel.cpp delete mode 100644 src/core/NEON/kernels/NECannyEdgeKernel.h delete mode 100644 src/core/NEON/kernels/NEChannelCombineKernel.cpp delete mode 100644 src/core/NEON/kernels/NEChannelCombineKernel.h delete mode 100644 src/core/NEON/kernels/NEChannelExtractKernel.cpp delete mode 100644 src/core/NEON/kernels/NEChannelExtractKernel.h delete mode 100644 src/core/NEON/kernels/NEColorConvertKernel.cpp delete mode 100644 src/core/NEON/kernels/NEColorConvertKernel.h delete mode 100644 src/core/NEON/kernels/NEDerivativeKernel.cpp delete mode 100644 src/core/NEON/kernels/NEDerivativeKernel.h delete mode 100644 src/core/NEON/kernels/NEDilateKernel.cpp delete mode 100644 src/core/NEON/kernels/NEDilateKernel.h delete mode 100644 src/core/NEON/kernels/NEErodeKernel.cpp delete mode 100644 src/core/NEON/kernels/NEErodeKernel.h delete mode 100644 src/core/NEON/kernels/NEFastCornersKernel.cpp delete mode 100644 src/core/NEON/kernels/NEFastCornersKernel.h delete mode 100644 src/core/NEON/kernels/NEGaussian3x3Kernel.cpp delete mode 100644 src/core/NEON/kernels/NEGaussian3x3Kernel.h delete mode 100644 src/core/NEON/kernels/NEGaussian5x5Kernel.cpp delete mode 100644 src/core/NEON/kernels/NEGaussian5x5Kernel.h delete mode 100644 src/core/NEON/kernels/NEGaussianPyramidKernel.cpp delete mode 100644 src/core/NEON/kernels/NEGaussianPyramidKernel.h delete mode 100644 src/core/NEON/kernels/NEHOGDescriptorKernel.cpp delete mode 100644 src/core/NEON/kernels/NEHOGDescriptorKernel.h delete mode 100644 src/core/NEON/kernels/NEHOGDetectorKernel.cpp delete mode 100644 src/core/NEON/kernels/NEHOGDetectorKernel.h delete mode 100644 src/core/NEON/kernels/NEHarrisCornersKernel.cpp delete mode 100644 src/core/NEON/kernels/NEHarrisCornersKernel.h delete mode 100644 src/core/NEON/kernels/NEHistogramKernel.cpp delete mode 100644 src/core/NEON/kernels/NEHistogramKernel.h delete mode 100644 src/core/NEON/kernels/NEIntegralImageKernel.cpp delete mode 100644 src/core/NEON/kernels/NEIntegralImageKernel.h delete mode 100644 src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp delete mode 100644 src/core/NEON/kernels/NEMagnitudePhaseKernel.h delete mode 100644 src/core/NEON/kernels/NEMeanStdDevKernel.cpp delete mode 100644 src/core/NEON/kernels/NEMeanStdDevKernel.h delete mode 100644 src/core/NEON/kernels/NEMedian3x3Kernel.cpp delete mode 100644 src/core/NEON/kernels/NEMedian3x3Kernel.h delete mode 100644 src/core/NEON/kernels/NEMinMaxLocationKernel.cpp delete mode 100644 src/core/NEON/kernels/NEMinMaxLocationKernel.h delete mode 100644 src/core/NEON/kernels/NENonLinearFilterKernel.cpp delete mode 100644 src/core/NEON/kernels/NENonLinearFilterKernel.h delete mode 100644 src/core/NEON/kernels/NERemapKernel.cpp delete mode 100644 src/core/NEON/kernels/NERemapKernel.h delete mode 100644 src/core/NEON/kernels/NEScharr3x3Kernel.cpp delete mode 100644 src/core/NEON/kernels/NEScharr3x3Kernel.h delete mode 100644 src/core/NEON/kernels/NESobel3x3Kernel.cpp delete mode 100644 src/core/NEON/kernels/NESobel3x3Kernel.h delete mode 100644 src/core/NEON/kernels/NESobel5x5Kernel.cpp delete mode 100644 src/core/NEON/kernels/NESobel5x5Kernel.h delete mode 100644 src/core/NEON/kernels/NESobel7x7Kernel.cpp delete mode 100644 src/core/NEON/kernels/NESobel7x7Kernel.h delete mode 100644 src/core/NEON/kernels/NETableLookupKernel.cpp delete mode 100644 src/core/NEON/kernels/NETableLookupKernel.h delete mode 100644 src/core/NEON/kernels/NEThresholdKernel.cpp delete mode 100644 src/core/NEON/kernels/NEThresholdKernel.h delete mode 100644 src/core/NEON/kernels/NEWarpKernel.cpp delete mode 100644 src/core/NEON/kernels/NEWarpKernel.h delete mode 100644 src/runtime/NEON/functions/NEAbsoluteDifference.cpp delete mode 100644 src/runtime/NEON/functions/NEAccumulate.cpp delete mode 100644 src/runtime/NEON/functions/NEBox3x3.cpp delete mode 100644 src/runtime/NEON/functions/NECannyEdge.cpp delete mode 100644 src/runtime/NEON/functions/NEChannelCombine.cpp delete mode 100644 src/runtime/NEON/functions/NEChannelExtract.cpp delete mode 100644 src/runtime/NEON/functions/NEColorConvert.cpp delete mode 100644 src/runtime/NEON/functions/NEDerivative.cpp delete mode 100644 src/runtime/NEON/functions/NEDilate.cpp delete mode 100644 src/runtime/NEON/functions/NEEqualizeHistogram.cpp delete mode 100644 src/runtime/NEON/functions/NEErode.cpp delete mode 100644 src/runtime/NEON/functions/NEFastCorners.cpp delete mode 100644 src/runtime/NEON/functions/NEGaussian3x3.cpp delete mode 100644 src/runtime/NEON/functions/NEGaussian5x5.cpp delete mode 100644 src/runtime/NEON/functions/NEGaussianPyramid.cpp delete mode 100644 src/runtime/NEON/functions/NEHOGDescriptor.cpp delete mode 100644 src/runtime/NEON/functions/NEHOGDetector.cpp delete mode 100644 src/runtime/NEON/functions/NEHOGGradient.cpp delete mode 100644 src/runtime/NEON/functions/NEHOGMultiDetection.cpp delete mode 100644 src/runtime/NEON/functions/NEHarrisCorners.cpp delete mode 100644 src/runtime/NEON/functions/NEHistogram.cpp delete mode 100644 src/runtime/NEON/functions/NEIntegralImage.cpp delete mode 100644 src/runtime/NEON/functions/NELaplacianPyramid.cpp delete mode 100644 src/runtime/NEON/functions/NELaplacianReconstruct.cpp delete mode 100644 src/runtime/NEON/functions/NEMagnitude.cpp delete mode 100644 src/runtime/NEON/functions/NEMeanStdDev.cpp delete mode 100644 src/runtime/NEON/functions/NEMedian3x3.cpp delete mode 100644 src/runtime/NEON/functions/NEMinMaxLocation.cpp delete mode 100644 src/runtime/NEON/functions/NENonLinearFilter.cpp delete mode 100644 src/runtime/NEON/functions/NEOpticalFlow.cpp delete mode 100644 src/runtime/NEON/functions/NEPhase.cpp delete mode 100644 src/runtime/NEON/functions/NERemap.cpp delete mode 100644 src/runtime/NEON/functions/NEScharr3x3.cpp delete mode 100644 src/runtime/NEON/functions/NESobel3x3.cpp delete mode 100644 src/runtime/NEON/functions/NESobel5x5.cpp delete mode 100644 src/runtime/NEON/functions/NESobel7x7.cpp delete mode 100644 src/runtime/NEON/functions/NETableLookup.cpp delete mode 100644 src/runtime/NEON/functions/NEThreshold.cpp delete mode 100644 src/runtime/NEON/functions/NEWarpAffine.cpp delete mode 100644 src/runtime/NEON/functions/NEWarpPerspective.cpp delete mode 100644 tests/validation/NEON/AbsoluteDifference.cpp delete mode 100644 tests/validation/NEON/Accumulate.cpp delete mode 100644 tests/validation/NEON/Box3x3.cpp delete mode 100644 tests/validation/NEON/CannyEdge.cpp delete mode 100644 tests/validation/NEON/ChannelCombine.cpp delete mode 100644 tests/validation/NEON/ChannelExtract.cpp delete mode 100644 tests/validation/NEON/ColorConvert.cpp delete mode 100644 tests/validation/NEON/Derivative.cpp delete mode 100644 tests/validation/NEON/Dilate.cpp delete mode 100644 tests/validation/NEON/EqualizeHistogram.cpp delete mode 100644 tests/validation/NEON/Erode.cpp delete mode 100644 tests/validation/NEON/FastCorners.cpp delete mode 100644 tests/validation/NEON/Gaussian3x3.cpp delete mode 100644 tests/validation/NEON/Gaussian5x5.cpp delete mode 100644 tests/validation/NEON/GaussianPyramid.cpp delete mode 100644 tests/validation/NEON/HOGDescriptor.cpp delete mode 100644 tests/validation/NEON/HOGDetector.cpp delete mode 100644 tests/validation/NEON/HOGMultiDetection.cpp delete mode 100644 tests/validation/NEON/HarrisCorners.cpp delete mode 100644 tests/validation/NEON/Histogram.cpp delete mode 100644 tests/validation/NEON/IntegralImage.cpp delete mode 100644 tests/validation/NEON/LaplacianPyramid.cpp delete mode 100644 tests/validation/NEON/LaplacianReconstruct.cpp delete mode 100644 tests/validation/NEON/Magnitude.cpp delete mode 100644 tests/validation/NEON/MeanStdDev.cpp delete mode 100644 tests/validation/NEON/Median3x3.cpp delete mode 100644 tests/validation/NEON/MinMaxLocation.cpp delete mode 100644 tests/validation/NEON/NonLinearFilter.cpp delete mode 100644 tests/validation/NEON/OpticalFlow.cpp delete mode 100644 tests/validation/NEON/Phase.cpp delete mode 100644 tests/validation/NEON/Remap.cpp delete mode 100644 tests/validation/NEON/Schaar.cpp delete mode 100644 tests/validation/NEON/Sobel.cpp delete mode 100644 tests/validation/NEON/TableLookup.cpp delete mode 100644 tests/validation/NEON/Threshold.cpp delete mode 100644 tests/validation/NEON/WarpAffine.cpp delete mode 100644 tests/validation/NEON/WarpPerspective.cpp diff --git a/Android.bp b/Android.bp index dc6c702011..6d0d6b6fa6 100644 --- a/Android.bp +++ b/Android.bp @@ -208,8 +208,6 @@ cc_library_static { "src/core/ITensor.cpp", "src/core/ITensorPack.cpp", "src/core/MultiImageInfo.cpp", - "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp", - "src/core/NEON/kernels/NEAccumulateKernel.cpp", "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp", "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp", "src/core/NEON/kernels/NEBitwiseAndKernel.cpp", @@ -217,13 +215,8 @@ cc_library_static { "src/core/NEON/kernels/NEBitwiseOrKernel.cpp", "src/core/NEON/kernels/NEBitwiseXorKernel.cpp", "src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp", - "src/core/NEON/kernels/NEBox3x3Kernel.cpp", - "src/core/NEON/kernels/NECannyEdgeKernel.cpp", - "src/core/NEON/kernels/NEChannelCombineKernel.cpp", - "src/core/NEON/kernels/NEChannelExtractKernel.cpp", "src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp", "src/core/NEON/kernels/NECol2ImKernel.cpp", - "src/core/NEON/kernels/NEColorConvertKernel.cpp", "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp", "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp", "src/core/NEON/kernels/NEConvolutionKernel.cpp", @@ -233,15 +226,11 @@ cc_library_static { "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp", "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp", "src/core/NEON/kernels/NEDequantizationLayerKernel.cpp", - "src/core/NEON/kernels/NEDerivativeKernel.cpp", - "src/core/NEON/kernels/NEDilateKernel.cpp", "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp", "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp", - "src/core/NEON/kernels/NEErodeKernel.cpp", "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp", "src/core/NEON/kernels/NEFFTRadixStageKernel.cpp", "src/core/NEON/kernels/NEFFTScaleKernel.cpp", - "src/core/NEON/kernels/NEFastCornersKernel.cpp", "src/core/NEON/kernels/NEFillArrayKernel.cpp", "src/core/NEON/kernels/NEFillBorderKernel.cpp", "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp", @@ -258,28 +247,15 @@ cc_library_static { "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp", "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp", "src/core/NEON/kernels/NEGatherKernel.cpp", - "src/core/NEON/kernels/NEGaussian3x3Kernel.cpp", - "src/core/NEON/kernels/NEGaussian5x5Kernel.cpp", - "src/core/NEON/kernels/NEGaussianPyramidKernel.cpp", "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp", - "src/core/NEON/kernels/NEHOGDescriptorKernel.cpp", - "src/core/NEON/kernels/NEHOGDetectorKernel.cpp", - "src/core/NEON/kernels/NEHarrisCornersKernel.cpp", - "src/core/NEON/kernels/NEHistogramKernel.cpp", "src/core/NEON/kernels/NEIm2ColKernel.cpp", "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp", - "src/core/NEON/kernels/NEIntegralImageKernel.cpp", "src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp", "src/core/NEON/kernels/NELKTrackerKernel.cpp", "src/core/NEON/kernels/NELogicalKernel.cpp", - "src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp", "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp", - "src/core/NEON/kernels/NEMeanStdDevKernel.cpp", "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp", - "src/core/NEON/kernels/NEMedian3x3Kernel.cpp", "src/core/NEON/kernels/NEMinMaxLayerKernel.cpp", - "src/core/NEON/kernels/NEMinMaxLocationKernel.cpp", - "src/core/NEON/kernels/NENonLinearFilterKernel.cpp", "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp", "src/core/NEON/kernels/NENormalizationLayerKernel.cpp", "src/core/NEON/kernels/NEPadLayerKernel.cpp", @@ -291,24 +267,16 @@ cc_library_static { "src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp", "src/core/NEON/kernels/NERangeKernel.cpp", "src/core/NEON/kernels/NEReductionOperationKernel.cpp", - "src/core/NEON/kernels/NERemapKernel.cpp", "src/core/NEON/kernels/NEReorgLayerKernel.cpp", "src/core/NEON/kernels/NEReverseKernel.cpp", "src/core/NEON/kernels/NEScaleKernel.cpp", - "src/core/NEON/kernels/NEScharr3x3Kernel.cpp", "src/core/NEON/kernels/NESelectKernel.cpp", - "src/core/NEON/kernels/NESobel3x3Kernel.cpp", - "src/core/NEON/kernels/NESobel5x5Kernel.cpp", - "src/core/NEON/kernels/NESobel7x7Kernel.cpp", "src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp", "src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp", "src/core/NEON/kernels/NEStackLayerKernel.cpp", "src/core/NEON/kernels/NEStridedSliceKernel.cpp", - "src/core/NEON/kernels/NETableLookupKernel.cpp", - "src/core/NEON/kernels/NEThresholdKernel.cpp", "src/core/NEON/kernels/NETileKernel.cpp", "src/core/NEON/kernels/NETransposeKernel.cpp", - "src/core/NEON/kernels/NEWarpKernel.cpp", "src/core/NEON/kernels/NEWeightsReshapeKernel.cpp", "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp", "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp", @@ -655,8 +623,6 @@ cc_library_static { "src/runtime/NEON/INEOperator.cpp", "src/runtime/NEON/INESimpleFunction.cpp", "src/runtime/NEON/INESimpleFunctionNoBorder.cpp", - "src/runtime/NEON/functions/NEAbsoluteDifference.cpp", - "src/runtime/NEON/functions/NEAccumulate.cpp", "src/runtime/NEON/functions/NEActivationLayer.cpp", "src/runtime/NEON/functions/NEArgMinMaxLayer.cpp", "src/runtime/NEON/functions/NEArithmeticAddition.cpp", @@ -668,13 +634,8 @@ cc_library_static { "src/runtime/NEON/functions/NEBitwiseOr.cpp", "src/runtime/NEON/functions/NEBitwiseXor.cpp", "src/runtime/NEON/functions/NEBoundingBoxTransform.cpp", - "src/runtime/NEON/functions/NEBox3x3.cpp", - "src/runtime/NEON/functions/NECannyEdge.cpp", "src/runtime/NEON/functions/NECast.cpp", - "src/runtime/NEON/functions/NEChannelCombine.cpp", - "src/runtime/NEON/functions/NEChannelExtract.cpp", "src/runtime/NEON/functions/NEChannelShuffleLayer.cpp", - "src/runtime/NEON/functions/NEColorConvert.cpp", "src/runtime/NEON/functions/NEConcatenateLayer.cpp", "src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp", "src/runtime/NEON/functions/NEConvolution.cpp", @@ -686,18 +647,13 @@ cc_library_static { "src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp", "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp", "src/runtime/NEON/functions/NEDequantizationLayer.cpp", - "src/runtime/NEON/functions/NEDerivative.cpp", "src/runtime/NEON/functions/NEDetectionPostProcessLayer.cpp", - "src/runtime/NEON/functions/NEDilate.cpp", "src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp", "src/runtime/NEON/functions/NEElementwiseOperations.cpp", "src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp", - "src/runtime/NEON/functions/NEEqualizeHistogram.cpp", - "src/runtime/NEON/functions/NEErode.cpp", "src/runtime/NEON/functions/NEFFT1D.cpp", "src/runtime/NEON/functions/NEFFT2D.cpp", "src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp", - "src/runtime/NEON/functions/NEFastCorners.cpp", "src/runtime/NEON/functions/NEFill.cpp", "src/runtime/NEON/functions/NEFillBorder.cpp", "src/runtime/NEON/functions/NEFlattenLayer.cpp", @@ -711,38 +667,19 @@ cc_library_static { "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp", "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp", "src/runtime/NEON/functions/NEGather.cpp", - "src/runtime/NEON/functions/NEGaussian3x3.cpp", - "src/runtime/NEON/functions/NEGaussian5x5.cpp", - "src/runtime/NEON/functions/NEGaussianPyramid.cpp", "src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp", - "src/runtime/NEON/functions/NEHOGDescriptor.cpp", - "src/runtime/NEON/functions/NEHOGDetector.cpp", - "src/runtime/NEON/functions/NEHOGGradient.cpp", - "src/runtime/NEON/functions/NEHOGMultiDetection.cpp", - "src/runtime/NEON/functions/NEHarrisCorners.cpp", - "src/runtime/NEON/functions/NEHistogram.cpp", "src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp", - "src/runtime/NEON/functions/NEIntegralImage.cpp", "src/runtime/NEON/functions/NEL2NormalizeLayer.cpp", "src/runtime/NEON/functions/NELSTMLayer.cpp", "src/runtime/NEON/functions/NELSTMLayerQuantized.cpp", - "src/runtime/NEON/functions/NELaplacianPyramid.cpp", - "src/runtime/NEON/functions/NELaplacianReconstruct.cpp", "src/runtime/NEON/functions/NELogical.cpp", - "src/runtime/NEON/functions/NEMagnitude.cpp", "src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp", - "src/runtime/NEON/functions/NEMeanStdDev.cpp", "src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp", - "src/runtime/NEON/functions/NEMedian3x3.cpp", - "src/runtime/NEON/functions/NEMinMaxLocation.cpp", - "src/runtime/NEON/functions/NENonLinearFilter.cpp", "src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp", "src/runtime/NEON/functions/NENormalizationLayer.cpp", - "src/runtime/NEON/functions/NEOpticalFlow.cpp", "src/runtime/NEON/functions/NEPReluLayer.cpp", "src/runtime/NEON/functions/NEPadLayer.cpp", "src/runtime/NEON/functions/NEPermute.cpp", - "src/runtime/NEON/functions/NEPhase.cpp", "src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp", "src/runtime/NEON/functions/NEPoolingLayer.cpp", "src/runtime/NEON/functions/NEPriorBoxLayer.cpp", @@ -754,30 +691,21 @@ cc_library_static { "src/runtime/NEON/functions/NERange.cpp", "src/runtime/NEON/functions/NEReduceMean.cpp", "src/runtime/NEON/functions/NEReductionOperation.cpp", - "src/runtime/NEON/functions/NERemap.cpp", "src/runtime/NEON/functions/NEReorgLayer.cpp", "src/runtime/NEON/functions/NEReshapeLayer.cpp", "src/runtime/NEON/functions/NEReverse.cpp", "src/runtime/NEON/functions/NEScale.cpp", - "src/runtime/NEON/functions/NEScharr3x3.cpp", "src/runtime/NEON/functions/NESelect.cpp", "src/runtime/NEON/functions/NESlice.cpp", - "src/runtime/NEON/functions/NESobel3x3.cpp", - "src/runtime/NEON/functions/NESobel5x5.cpp", - "src/runtime/NEON/functions/NESobel7x7.cpp", "src/runtime/NEON/functions/NESoftmaxLayer.cpp", "src/runtime/NEON/functions/NESpaceToBatchLayer.cpp", "src/runtime/NEON/functions/NESpaceToDepthLayer.cpp", "src/runtime/NEON/functions/NESplit.cpp", "src/runtime/NEON/functions/NEStackLayer.cpp", "src/runtime/NEON/functions/NEStridedSlice.cpp", - "src/runtime/NEON/functions/NETableLookup.cpp", - "src/runtime/NEON/functions/NEThreshold.cpp", "src/runtime/NEON/functions/NETile.cpp", "src/runtime/NEON/functions/NETranspose.cpp", "src/runtime/NEON/functions/NEUnstack.cpp", - "src/runtime/NEON/functions/NEWarpAffine.cpp", - "src/runtime/NEON/functions/NEWarpPerspective.cpp", "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp", "src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp", "src/runtime/OMP/OMPScheduler.cpp", diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index e876e3916f..8b6649cc41 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -24,9 +24,6 @@ #ifndef ARM_COMPUTE_NEFUNCTIONS_H #define ARM_COMPUTE_NEFUNCTIONS_H -/* Header regrouping all the Neon functions */ -#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" -#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" @@ -38,13 +35,8 @@ #include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" #include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" #include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h" -#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" -#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" #include "arm_compute/runtime/NEON/functions/NECast.h" -#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" -#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" #include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h" -#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" #include "arm_compute/runtime/NEON/functions/NEConvolution.h" @@ -56,18 +48,13 @@ #include "arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h" #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDerivative.h" #include "arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDilate.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h" #include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h" -#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" -#include "arm_compute/runtime/NEON/functions/NEErode.h" #include "arm_compute/runtime/NEON/functions/NEFFT1D.h" #include "arm_compute/runtime/NEON/functions/NEFFT2D.h" #include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" #include "arm_compute/runtime/NEON/functions/NEFill.h" #include "arm_compute/runtime/NEON/functions/NEFillBorder.h" #include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" @@ -80,38 +67,19 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/NEON/functions/NEGather.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" #include "arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" -#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" -#include "arm_compute/runtime/NEON/functions/NEHistogram.h" #include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" #include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h" #include "arm_compute/runtime/NEON/functions/NELSTMLayer.h" #include "arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h" -#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" -#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" #include "arm_compute/runtime/NEON/functions/NELogical.h" -#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" #include "arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h" -#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" -#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" -#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" #include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" #include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" #include "arm_compute/runtime/NEON/functions/NEPReluLayer.h" #include "arm_compute/runtime/NEON/functions/NEPadLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/NEON/functions/NEPhase.h" #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" #include "arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h" @@ -123,30 +91,21 @@ #include "arm_compute/runtime/NEON/functions/NERange.h" #include "arm_compute/runtime/NEON/functions/NEReduceMean.h" #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" -#include "arm_compute/runtime/NEON/functions/NERemap.h" #include "arm_compute/runtime/NEON/functions/NEReorgLayer.h" #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" #include "arm_compute/runtime/NEON/functions/NEReverse.h" #include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" #include "arm_compute/runtime/NEON/functions/NESelect.h" #include "arm_compute/runtime/NEON/functions/NESlice.h" -#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" -#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" -#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" #include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" #include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h" #include "arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h" #include "arm_compute/runtime/NEON/functions/NESplit.h" #include "arm_compute/runtime/NEON/functions/NEStackLayer.h" #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" -#include "arm_compute/runtime/NEON/functions/NETableLookup.h" -#include "arm_compute/runtime/NEON/functions/NEThreshold.h" #include "arm_compute/runtime/NEON/functions/NETile.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" #include "arm_compute/runtime/NEON/functions/NEUnstack.h" -#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" -#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" #endif /* ARM_COMPUTE_NEFUNCTIONS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h deleted file mode 100644 index f00b144475..0000000000 --- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H -#define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEAbsoluteDifferenceKernel - * - * @note The image data type for the inputs must be U8 or S16 - * @note The function calculates the absolute difference also when the 2 inputs have different image data types - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEAbsoluteDifference : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAbsoluteDifference() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifference(const NEAbsoluteDifference &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifference &operator=(const NEAbsoluteDifference &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAbsoluteDifference(NEAbsoluteDifference &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAbsoluteDifference &operator=(NEAbsoluteDifference &&) = delete; - /** Default destructor */ - ~NEAbsoluteDifference(); - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); -}; -} -#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h deleted file mode 100644 index 1881411880..0000000000 --- a/arm_compute/runtime/NEON/functions/NEAccumulate.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACCUMULATE_H -#define ARM_COMPUTE_NEACCUMULATE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEAccumulateKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEAccumulate : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAccumulate() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulate(const NEAccumulate &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulate &operator=(const NEAccumulate &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulate(NEAccumulate &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulate &operator=(NEAccumulate &&) = delete; - /** Default destructor */ - ~NEAccumulate(); - /** Set the input and accumulation tensors - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *output); -}; - -/** Basic function to run @ref NEAccumulateWeightedKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEAccumulateWeighted : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAccumulateWeighted() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeighted(const NEAccumulateWeighted &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeighted &operator=(const NEAccumulateWeighted &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateWeighted(NEAccumulateWeighted &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateWeighted &operator=(NEAccumulateWeighted &&) = delete; - /** Default destructor */ - ~NEAccumulateWeighted(); - /** Set the input and accumulation tensors, and the scale value - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] alpha The input scalar value with a value input the range of [0, 1.0] - * @param[in,out] output Accumulated tensor. Data type supported: U8. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. - */ - void configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16 = false); -}; - -/** Basic function to run @ref NEAccumulateSquaredKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEAccumulateSquared : public INESimpleFunctionNoBorder -{ -public: - /** Default constructor */ - NEAccumulateSquared() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateSquared(const NEAccumulateSquared &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateSquared &operator=(const NEAccumulateSquared &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateSquared(NEAccumulateSquared &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEAccumulateSquared &operator=(NEAccumulateSquared &&) = delete; - /** Default destructor */ - ~NEAccumulateSquared(); - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] shift The input with a value input the range of [0, 15] - * @param[in,out] output Accumulated tensor. Data type supported: S16. - */ - void configure(const ITensor *input, uint32_t shift, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACCUMULATE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h deleted file mode 100644 index b45cf946e4..0000000000 --- a/arm_compute/runtime/NEON/functions/NEBox3x3.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOX3x3_H -#define ARM_COMPUTE_NEBOX3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute box filter 3x3. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEBox3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEBox3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's input, output and border mode. - * - * @note The border handler is run on the input tensor. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false); -}; -} -#endif /*ARM_COMPUTE_NEBOX3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h deleted file mode 100644 index 2d8bac1db9..0000000000 --- a/arm_compute/runtime/NEON/functions/NECannyEdge.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECANNYEDGE_H -#define ARM_COMPUTE_NECANNYEDGE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NEGradientKernel; -class NEFillBorderKernel; -class NEEdgeNonMaxSuppressionKernel; -class NEEdgeTraceKernel; - -/** Basic function to execute canny edge on Neon. This function calls the following Neon kernels and functions: - * - * -# @ref NEFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT) - * -# @ref NESobel3x3 (if gradient_size == 3) or - * @ref NESobel5x5 (if gradient_size == 5) or - * @ref NESobel7x7 (if gradient_size == 7) - * -# @ref NEGradientKernel - * -# @ref NEEdgeNonMaxSuppressionKernel - * -# @ref NEEdgeTraceKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - * - */ -class NECannyEdge : public IFunction -{ -public: - /** Constructor - * - * Initialize Sobel kernel to nullptr. - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NECannyEdge(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECannyEdge(const NECannyEdge &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECannyEdge &operator=(const NECannyEdge &) = delete; - /** Default destructor */ - ~NECannyEdge(); - /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] upper_thr Upper threhold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis. - * @param[in] gradient_size Gradient size (3, 5 or 7) - * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr _sobel; /**< Pointer to Sobel kernel */ - std::unique_ptr _gradient; /**< Gradient kernel */ - std::unique_ptr _non_max_suppr; /**< Non-Maxima suppression kernel */ - std::unique_ptr _edge_trace; /**< Edge tracing kernel */ - std::unique_ptr _border_mag_gradient; /**< Fill border on magnitude tensor kernel */ - std::unique_ptr _border_edge_trace; /**< Fill border before edge trace */ - Tensor _gx; /**< Source tensor - Gx component */ - Tensor _gy; /**< Source tensor - Gy component */ - Tensor _magnitude; /**< Source tensor - Magnitude */ - Tensor _phase; /**< Source tensor - Phase */ - Tensor _nonmax; /**< Source tensor - Non-Maxima suppressed */ - ITensor *_output; /**< Output tensor provided by the user. */ -}; -} -#endif /* ARM_COMPUTE_NECANNYEDGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h deleted file mode 100644 index c4ead73343..0000000000 --- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELCOMBINE_H -#define ARM_COMPUTE_NECHANNELCOMBINE_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEChannelCombine : public INESimpleFunctionNoBorder -{ -public: - /** Initialize function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 - * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - */ - void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); - /** Initialize function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[out] output The multi planar output image. Formats supported: NV12/NV21/IYUV/YUV444 - */ - void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECHANNELCOMBINE_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h deleted file mode 100644 index 99522d2d74..0000000000 --- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELEXTRACT_H -#define ARM_COMPUTE_NECHANNELEXTRACT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEChannelExtract : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function's source, destination - * - * @param[in] input The input tensor to extract the channel from. Formats supported: Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel The channel to extract. - * @param[out] output The extracted channel. Format supported: U8 - */ - void configure(const ITensor *input, Channel channel, ITensor *output); - /** Initialize the function's source, destination - * - * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel The channel to extract. - * @param[out] output The extracted channel. Format supported: U8 - */ - void configure(const IMultiImage *input, Channel channel, IImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECHANNELEXTRACT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h deleted file mode 100644 index 8974aa63a1..0000000000 --- a/arm_compute/runtime/NEON/functions/NEColorConvert.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOLORCONVERT_H -#define ARM_COMPUTE_NECOLORCONVERT_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class IMultiImage; -using IImage = ITensor; - -/**Basic function to run @ref NEColorConvertKernel to perform color conversion - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEColorConvert : public INESimpleFunctionNoBorder -{ -public: - /** Initialize the function's source, destination - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ITensor *input, ITensor *output); - /** Initialize the function's source, destination - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const IMultiImage *input, IImage *output); - /** Initialize the function's source, destination - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const IImage *input, IMultiImage *output); - /** Initialize the function's source, destination - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const IMultiImage *input, IMultiImage *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOLORCONVERT_H*/ diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h deleted file mode 100644 index c1e110b55b..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDerivative.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDERIVATIVE_H -#define ARM_COMPUTE_NEDERIVATIVE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; -class NEDerivativeKernel; -class NEFillBorderKernel; - -/** Basic function to execute first order derivative operator. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEDerivativeKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEDerivative : public IFunction -{ -public: - /** Default constructor */ - NEDerivative(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivative(const NEDerivative &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivative &operator=(const NEDerivative &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEDerivative(NEDerivative &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEDerivative &operator=(NEDerivative &&) = delete; - /** Default destructor */ - ~NEDerivative(); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data type supported: S16. - * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data type supported: S16. - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr _kernel; /**< Derivative kernel */ - std::unique_ptr _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /* ARM_COMPUTE_NEDERIVATIVE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h deleted file mode 100644 index 6c119d0d75..0000000000 --- a/arm_compute/runtime/NEON/functions/NEDilate.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDILATE_H -#define ARM_COMPUTE_NEDILATE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute dilate. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEDilateKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEDilate : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and border mode. - * - * @param[in, out] input First tensor input. Data type supported: U8.(Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEDILATE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h deleted file mode 100644 index 5d50651810..0000000000 --- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H -#define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H - -#include "arm_compute/runtime/Distribution1D.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/Lut.h" - -#include - -namespace arm_compute -{ -class ITensor; -class NEHistogramKernel; -class NECumulativeDistributionKernel; -class NETableLookupKernel; -using IImage = ITensor; - -/** Basic function to execute histogram equalization. This function calls the following Neon kernels: - * - * -# @ref NEHistogramKernel - * -# @ref NECumulativeDistributionKernel - * -# @ref NETableLookupKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEEqualizeHistogram : public IFunction -{ -public: - /** Default Constructor. */ - NEEqualizeHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEqualizeHistogram(const NEEqualizeHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEqualizeHistogram &operator=(const NEEqualizeHistogram &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEEqualizeHistogram(NEEqualizeHistogram &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEEqualizeHistogram &operator=(NEEqualizeHistogram &&) = delete; - /** Default destructor */ - ~NEEqualizeHistogram(); - /** Initialise the kernel's inputs. - * - * @note Currently the width of the input image must be a multiple of 16. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] output Output image. Data type supported: same as @p input - */ - void configure(const IImage *input, IImage *output); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr _histogram_kernel; /**< Kernel that calculates the histogram of input. */ - std::unique_ptr _cd_histogram_kernel; /**< Kernel that calculates the cumulative distribution - and creates the relevant LookupTable. */ - std::unique_ptr _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */ - Distribution1D _hist; /**< Distribution that holds the histogram of the input image. */ - Distribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */ - Lut _cd_lut; /**< Holds the equalization lookuptable. */ - static constexpr uint32_t nr_bins{ 256 }; /**< Histogram bins of the internal histograms. */ - static constexpr uint32_t max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */ -}; -} -#endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h deleted file mode 100644 index 461bc878c9..0000000000 --- a/arm_compute/runtime/NEON/functions/NEErode.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEERODE_H -#define ARM_COMPUTE_NEERODE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute erode. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEErodeKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEErode : public INESimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and border mode - * - * @param[in, out] input First tensor input. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEERODE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h deleted file mode 100644 index c66aeedcca..0000000000 --- a/arm_compute/runtime/NEON/functions/NEFastCorners.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFASTCORNERS_H -#define ARM_COMPUTE_NEFASTCORNERS_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NENonMaximaSuppression3x3Kernel; -class NEFastCornersKernel; -class NEFillBorderKernel; -class NEFillArrayKernel; -using IImage = ITensor; - -/** Basic function to execute fast corners. This function call the following Neon kernels: - * - * -# @ref NEFastCornersKernel - * -# @ref NENonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true) - * -# @ref NEFillArrayKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEFastCorners : public IFunction -{ -public: - /** Constructor */ - NEFastCorners(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCorners(const NEFastCorners &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCorners &operator=(const NEFastCorners &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEFastCorners(NEFastCorners &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEFastCorners &operator=(NEFastCorners &&) = delete; - /** Default destructor */ - ~NEFastCorners(); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array. - * @param[out] corners Array of keypoints to store the results. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::unique_ptr _fast_corners_kernel; - std::unique_ptr _border_handler; - std::unique_ptr _nonmax_kernel; - std::unique_ptr _fill_kernel; - Image _output; - Image _suppressed; - bool _non_max; -}; -} -#endif /*ARM_COMPUTE_NEFASTCORNERS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h deleted file mode 100644 index 8edf558db8..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN3x3_H -#define ARM_COMPUTE_NEGAUSSIAN3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute gaussian filter 3x3. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEGaussian3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's input, output and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEGAUSSIAN3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h deleted file mode 100644 index 8ad9e9a7ed..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H -#define ARM_COMPUTE_NEGAUSSIAN5x5_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NEGaussian5x5HorKernel; -class NEGaussian5x5VertKernel; -class NEFillBorderKernel; - -/** Basic function to execute gaussian filter 5x5. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian5x5HorKernel - * -# @ref NEGaussian5x5VertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEGaussian5x5 : public IFunction -{ -public: - /** Default constructor - */ - NEGaussian5x5(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5(const NEGaussian5x5 &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5 &operator=(const NEGaussian5x5 &) = delete; - /** Allow instances of this class to be moved */ - NEGaussian5x5(NEGaussian5x5 &&) = default; - /** Allow instances of this class to be moved */ - NEGaussian5x5 &operator=(NEGaussian5x5 &&) = default; - /** Default destructor */ - ~NEGaussian5x5(); - /** Initialise the function's input, output and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - std::unique_ptr _kernel_hor; /**< kernel for horizontal pass */ - std::unique_ptr _kernel_vert; /**< kernel for vertical pass */ - Tensor _tmp; /**< temporary buffer for output of horizontal pass */ - std::unique_ptr _border_handler; /**< kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h deleted file mode 100644 index 76a277782d..0000000000 --- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMID_H -#define ARM_COMPUTE_NEGAUSSIANPYRAMID_H - -#include "arm_compute/core/IPyramid.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Pyramid.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NEGaussianPyramidHorKernel; -class NEGaussianPyramidVertKernel; -class NEFillBorderKernel; - -/** Common interface for all Gaussian pyramid functions - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEGaussianPyramid : public IFunction -{ -public: - /** Default constructor */ - NEGaussianPyramid(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramid(const NEGaussianPyramid &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramid &operator=(const NEGaussianPyramid &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramid(NEGaussianPyramid &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramid &operator=(NEGaussianPyramid &&) = default; - /** Default destructor */ - virtual ~NEGaussianPyramid() = default; - - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: U8. - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - virtual void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) = 0; - -protected: - const ITensor *_input; - IPyramid *_pyramid; - Pyramid _tmp; -}; - -/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussianPyramidHorKernel - * -# @ref NEGaussianPyramidVertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - * - */ -class NEGaussianPyramidHalf : public NEGaussianPyramid -{ -public: - /** Constructor */ - NEGaussianPyramidHalf(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHalf(const NEGaussianPyramidHalf &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHalf &operator=(const NEGaussianPyramidHalf &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHalf(NEGaussianPyramidHalf &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHalf &operator=(NEGaussianPyramidHalf &&) = default; - /** Default destructor */ - ~NEGaussianPyramidHalf(); - - // Inherited methods overridden: - void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; - void run() override; - -private: - std::vector> _horizontal_border_handler; - std::vector> _vertical_border_handler; - std::vector> _horizontal_reduction; - std::vector> _vertical_reduction; -}; - -/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following Neon kernels and functions: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEGaussian5x5 - * -# @ref NEScaleKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - * - */ -class NEGaussianPyramidOrb : public NEGaussianPyramid -{ -public: - /** Constructor */ - NEGaussianPyramidOrb(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidOrb(const NEGaussianPyramidOrb &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidOrb &operator=(const NEGaussianPyramidOrb &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidOrb(NEGaussianPyramidOrb &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidOrb &operator=(NEGaussianPyramidOrb &&) = default; - /** Default destructor */ - ~NEGaussianPyramidOrb(); - - // Inherited methods overridden: - void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override; - void run() override; - -private: - std::vector _gaus5x5; - std::vector _scale_nearest; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMID_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h deleted file mode 100644 index 4f8d8a5f61..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H -#define ARM_COMPUTE_NEHOGDESCRIPTOR_H - -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/Tensor.h" - -#include - -namespace arm_compute -{ -class IHOG; -class NEHOGOrientationBinningKernel; -class NEHOGBlockNormalizationKernel; - -/** Basic function to calculate HOG descriptor. This function calls the following Neon kernels: - * - * -# @ref NEHOGGradient - * -# @ref NEHOGOrientationBinningKernel - * -# @ref NEHOGBlockNormalizationKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGDescriptor : public IFunction -{ -public: - /** Default constructor */ - NEHOGDescriptor(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDescriptor(const NEHOGDescriptor &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDescriptor &operator=(const NEHOGDescriptor &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDescriptor(NEHOGDescriptor &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDescriptor &operator=(NEHOGDescriptor &&) = delete; - /** Default destructor */ - ~NEHOGDescriptor(); - /** Initialise the function's source, destination, HOG data-object and border mode - * - * @param[in, out] input Input tensor. Data type supported: U8 - * (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block - * @param[in] hog HOG data object which describes the HOG descriptor - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEHOGGradient _gradient; - std::unique_ptr _orient_bin; - std::unique_ptr _block_norm; - Tensor _mag; - Tensor _phase; - Tensor _hog_space; -}; -} - -#endif /* ARM_COMPUTE_NEHOGDESCRIPTOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h deleted file mode 100644 index aea1d9f741..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDETECTOR_H -#define ARM_COMPUTE_NEHOGDETECTOR_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ITensorInfo; -/** Basic function to execute HOG detector based on linear SVM. This function calls the following Neon kernel: - * - * -# @ref NEHOGDetectorKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGDetector : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEHOGDetector() = default; - /** Prevent instances of this class from being copied */ - NEHOGDetector(const NEHOGDetector &) = delete; - /** Default move constructor */ - NEHOGDetector(NEHOGDetector &&) = default; - /** Prevent instances of this class from being copied */ - NEHOGDetector &operator=(const NEHOGDetector &) = delete; - /** Default move assignment operator */ - NEHOGDetector &operator=(NEHOGDetector &&) = default; - /** Destructor */ - ~NEHOGDetector(); - /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class - * - * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it. - * - * @param[in] input Input tensor. It is the output of @ref NEHOGDescriptor. Data type supported: F32 - * @param[in] hog HOG data-object that describes the HOG descriptor - * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the block stride stored in hog - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEHOGDETECTOR_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h deleted file mode 100644 index 16c1d24bca..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGGRADIENT_H -#define ARM_COMPUTE_NEHOGGRADIENT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEDerivative.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class ICPPKernel; - -/** Basic function to calculate the gradient for HOG. This function calls the following Neon kernels: - * - * -# @ref NEDerivative - * -# NEMagnitudePhaseKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGGradient : public IFunction -{ -public: - /** Default constructor */ - NEHOGGradient(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGGradient(const NEHOGGradient &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGGradient &operator=(const NEHOGGradient &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGGradient(NEHOGGradient &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGGradient &operator=(NEHOGGradient &&) = delete; - /** Default destructor */ - ~NEHOGGradient(); - /** Initialise the function's source, destinations, phase type and border mode - * - * @param[in, out] input Input tensor. Data type supported: U8. - * (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16. - * @param[out] output_phase Output tensor.(phase). Format supported: U8 - * @param[in] phase_type Type of @ref PhaseType - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEDerivative _derivative; - std::unique_ptr _mag_phase; - Tensor _gx; - Tensor _gy; -}; -} -#endif /*ARM_COMPUTE_NEHOGGRADIENT_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h deleted file mode 100644 index 5b54d709e2..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGMULTIDETECTION_H -#define ARM_COMPUTE_NEHOGMULTIDETECTION_H - -#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IMultiHOG.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" -#include "arm_compute/runtime/Tensor.h" - -#include - -namespace arm_compute -{ -class NEHOGOrientationBinningKernel; -class NEHOGBlockNormalizationKernel; - -/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following Neon kernels: - * - * -# @ref NEHOGGradient - * -# @ref NEHOGOrientationBinningKernel - * -# @ref NEHOGBlockNormalizationKernel - * -# @ref NEHOGDetector - * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true) - * - * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same: - * -# Phase type - -# Normalization type - -# L2 hysteresis threshold if the normalization type is L2HYS_NORM - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHOGMultiDetection : public IFunction -{ -public: - /** Default constructor */ - NEHOGMultiDetection(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGMultiDetection(const NEHOGMultiDetection &) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NEHOGMultiDetection(NEHOGMultiDetection &&) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete; - /** Prevent instances of this class from being moved (As this class contains pointers) */ - NEHOGMultiDetection &operator=(NEHOGMultiDetection &&) = delete; - /** Default destructor */ - ~NEHOGMultiDetection(); - /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression - * - * @param[in, out] input Input tensor. Data type supported: U8 - * (Written to only for @p border_mode != UNDEFINED) - * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect. - * This container should store the HOG data-objects in descending or ascending cell_size width order. - * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects - * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects - * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object - * The dimension of this array must be the same of multi_hog->num_models() - * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not. - * True if the non-maxima suppression stage has to be computed - * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage - * - */ - void configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, - uint8_t constant_border_value = 0, - float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); - - // Inherited method overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NEHOGGradient _gradient_kernel; - std::vector _orient_bin_kernel; - std::vector _block_norm_kernel; - std::vector _hog_detect_kernel; - CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel; - std::vector _hog_space; - std::vector _hog_norm_space; - IDetectionWindowArray *_detection_windows; - Tensor _mag; - Tensor _phase; - bool _non_maxima_suppression; - size_t _num_orient_bin_kernel; - size_t _num_block_norm_kernel; - size_t _num_hog_detect_kernel; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGMULTIDETECTION_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h deleted file mode 100644 index 424d085737..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHARRISCORNERS_H -#define ARM_COMPUTE_NEHARRISCORNERS_H - -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NEFillBorderKernel; -class INEHarrisScoreKernel; -using IImage = ITensor; - -/** Basic function to execute harris corners detection. This function calls the following Neon kernels and functions: - * - * -# @ref NESobel3x3 (if gradient_size == 3) or
- * @ref NESobel5x5 (if gradient_size == 5) or
- * @ref NESobel7x7 (if gradient_size == 7) - * -# @ref NEFillBorderKernel - * -# NEHarrisScoreKernel<3> (if block_size == 3) or
- * NEHarrisScoreKernel<5> (if block_size == 5) or
- * NEHarrisScoreKernel<7> (if block_size == 7) - * -# @ref NENonMaximaSuppression3x3 - * -# @ref CPPCornerCandidatesKernel - * -# @ref CPPSortEuclideanDistanceKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHarrisCorners : public IFunction -{ -public: - /** Constructor - * - * Initialize _sobel, _harris_score and _corner_list to nullptr. - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NEHarrisCorners(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHarrisCorners(const NEHarrisCorners &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHarrisCorners &operator=(const NEHarrisCorners &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHarrisCorners(NEHarrisCorners &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHarrisCorners &operator=(NEHarrisCorners &&) = delete; - /** Default destructor */ - ~NEHarrisCorners(); - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] min_dist Radial Euclidean distance for the euclidean diatance stage - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation - * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7 - * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7. - * @param[out] corners Array of keypoints to store the results. - * @param[in] border_mode Border mode to use - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(IImage *input, float threshold, float min_dist, float sensitivity, - int32_t gradient_size, int32_t block_size, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; /**< Function's memory group */ - std::unique_ptr _sobel; /**< Sobel function */ - std::unique_ptr _harris_score; /**< Harris score kernel */ - NENonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */ - CPPCornerCandidatesKernel _candidates; /**< Sort kernel */ - CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */ - std::unique_ptr _border_gx; /**< Border handler before running harris score */ - std::unique_ptr _border_gy; /**< Border handler before running harris score */ - Image _gx; /**< Source image - Gx component */ - Image _gy; /**< Source image - Gy component */ - Image _score; /**< Source image - Harris score */ - Image _nonmax; /**< Source image - Non-Maxima suppressed image */ - std::vector _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */ - int32_t _num_corner_candidates; /**< Number of potential corner candidates */ -}; -} -#endif /*ARM_COMPUTE_NEHARRISCORNERS_H */ diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h deleted file mode 100644 index d922ef1214..0000000000 --- a/arm_compute/runtime/NEON/functions/NEHistogram.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHISTOGRAM_H -#define ARM_COMPUTE_NEHISTOGRAM_H - -#include "arm_compute/runtime/IFunction.h" - -#include -#include -#include -#include - -namespace arm_compute -{ -class ITensor; -class IDistribution1D; -class NEHistogramKernel; -using IImage = ITensor; - -/** Basic function to run @ref NEHistogramKernel. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEHistogram : public IFunction -{ -public: - /** Default Constructor. */ - NEHistogram(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogram(const NEHistogram &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogram &operator=(const NEHistogram &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogram(NEHistogram &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogram &operator=(NEHistogram &&) = delete; - /** Default destructor */ - ~NEHistogram(); - /** Initialise the kernel's inputs. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] output Output distribution. - */ - void configure(const IImage *input, IDistribution1D *output); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr _histogram_kernel; - std::vector _local_hist; - std::vector _window_lut; - size_t _local_hist_size; - /** 256 possible pixel values as we handle only U8 images */ - static constexpr unsigned int window_lut_default_size = 256; -}; -} -#endif /*ARM_COMPUTE_NEHISTOGRAM_H */ diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h deleted file mode 100644 index 31c0ec9ebe..0000000000 --- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINTEGRALIMAGE_H -#define ARM_COMPUTE_NEINTEGRALIMAGE_H - -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run a @ref NEIntegralImageKernel - * -* @deprecated This function is deprecated and is intended to be removed in 21.05 release -* -*/ -class NEIntegralImage : public INESimpleFunction -{ -public: - /** Constructor */ - NEIntegralImage() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIntegralImage(const NEIntegralImage &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIntegralImage &operator=(const NEIntegralImage &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIntegralImage(NEIntegralImage &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIntegralImage &operator=(NEIntegralImage &&) = delete; - /** Default destructor */ - ~NEIntegralImage(); - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U32. - */ - void configure(const ITensor *input, ITensor *output); -}; -} -#endif /*ARM_COMPUTE_NEINTEGRALIMAGE_H */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h deleted file mode 100644 index 7c8f378d82..0000000000 --- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELAPLACIANPYRAMID_H -#define ARM_COMPUTE_NELAPLACIANPYRAMID_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" -#include "arm_compute/runtime/Pyramid.h" - -#include -#include -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute laplacian pyramid. This function calls the following Neon kernels and functions: - * - * -# @ref NEGaussianPyramidHalf - * -# @ref NEGaussian5x5 - * -# @ref NEArithmeticSubtraction - * - * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then - * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid. - * L(i) = I(i) - Gaussian5x5(I(i)) - * Level 0 has always the same first two dimensions as the input tensor. - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NELaplacianPyramid : public IFunction -{ -public: - /** Constructor */ - NELaplacianPyramid(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianPyramid(const NELaplacianPyramid &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianPyramid &operator=(const NELaplacianPyramid &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianPyramid(NELaplacianPyramid &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianPyramid &operator=(NELaplacianPyramid &&) = delete; - /** Default destructor */ - ~NELaplacianPyramid(); - /** Initialise the function's source, destinations and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] pyramid Destination pyramid tensors, Data type supported at each level: S16. - * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data type supported: S16. - * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is: - * out.width = in.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1) - * @param[in] border_mode Border mode to use. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run() override; - -private: - size_t _num_levels; - NEGaussianPyramidHalf _gaussian_pyr_function; - std::vector _convf; - std::vector _subf; - Pyramid _gauss_pyr; - Pyramid _conv_pyr; - NEDepthConvertLayer _depth_function; -}; -} -#endif /*ARM_COMPUTE_NELAPLACIANPYRAMID_H */ diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h deleted file mode 100644 index 9397dd1828..0000000000 --- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H -#define ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Pyramid.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Basic function to execute laplacian reconstruction. This function calls the following Neon kernels and functions: - * - * -# @ref NEArithmeticAddition - * -# @ref NEScale - * -# @ref NEDepthConvertLayer - * - * This function reconstructs the original image from a Laplacian Image Pyramid. - * - * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the - * resolution of the next pyramid level. - * - * I(n-2) = upsample( input + L(n-1) - * - * For each pyramid level i, except i=0 and i=n-1: - * I(i-1) = upsample(I(i) + L(i)) - * - * output = I(0) + L(0) - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NELaplacianReconstruct : public IFunction -{ -public: - /** Constructor */ - NELaplacianReconstruct(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianReconstruct(const NELaplacianReconstruct &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELaplacianReconstruct &operator=(const NELaplacianReconstruct &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianReconstruct(NELaplacianReconstruct &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NELaplacianReconstruct &operator=(NELaplacianReconstruct &&) = delete; - /** Default destructor */ - ~NELaplacianReconstruct(); - /** Initialise the function's source, destinations and border mode. - * - * The Output image must have the same size as the first level of the pyramid. - * The Input image must have the same size as the last level of the pyramid. - * - * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid. - * - * @param[in] pyramid Laplacian pyramid tensors, Data type supported at each level: S16. - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(const IPyramid *pyramid, ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run() override; - -private: - Pyramid _tmp_pyr; - std::vector _addf; - std::vector _scalef; - NEDepthConvertLayer _depthf; -}; -} -#endif /*ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h deleted file mode 100644 index e100de2e08..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMagnitude.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMAGNITUDE_H -#define ARM_COMPUTE_NEMAGNITUDE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEMagnitudePhaseKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEMagnitude : public INESimpleFunctionNoBorder -{ -public: - /** Constructor */ - NEMagnitude() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitude(const NEMagnitude &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitude &operator=(const NEMagnitude &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMagnitude(NEMagnitude &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMagnitude &operator=(NEMagnitude &&) = delete; - /** Default destructor */ - ~NEMagnitude(); - /** Initialise the kernel's inputs. - * - * @param[in] input1 First tensor input. Data type supported: S16. - * @param[in] input2 Second tensor input. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: S16. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMAGNITUDE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h deleted file mode 100644 index ce7bb8c5ee..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEV_H -#define ARM_COMPUTE_NEMEANSTDDEV_H - -#include "arm_compute/core/IMultiImage.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include - -#include - -namespace arm_compute -{ -class NEMeanStdDevKernel; -class NEFillBorderKernel; - -/** Basic function to execute mean and std deviation. This function calls the following Neon kernels: - * - * @ref NEMeanStdDevKernel - * - */ -class NEMeanStdDev : public IFunction -{ -public: - /** Default Constructor. */ - NEMeanStdDev(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDev(const NEMeanStdDev &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDev &operator=(const NEMeanStdDev &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDev(NEMeanStdDev &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDev &operator=(NEMeanStdDev &&) = delete; - /** Default destructor */ - ~NEMeanStdDev(); - /** Initialise the kernel's inputs and outputs. - * - * @param[in, out] input Input image. Data types supported: U8. (Written to only for border filling) - * @param[out] mean Output average pixel value. - * @param[out] stddev (Optional) Output standard deviation of pixel values. - */ - void configure(IImage *input, float *mean, float *stddev = nullptr); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */ - std::unique_ptr _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */ - uint64_t _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */ - uint64_t _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */ -}; -} -#endif /*ARM_COMPUTE_NEMEANSTDDEV_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h deleted file mode 100644 index 90ee9ae5ef..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEDIAN3x3_H -#define ARM_COMPUTE_NEMEDIAN3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute median filter. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEMedian3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEMedian3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEMEDIAN3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h deleted file mode 100644 index e1f765ebd4..0000000000 --- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMINMAXLOCATION_H -#define ARM_COMPUTE_NEMINMAXLOCATION_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NEMinMaxKernel; -class NEMinMaxLocationKernel; -using IImage = ITensor; - -/** Basic function to execute min and max location. This function calls the following Neon kernels: - * - * -# NEMinMaxKernel - * -# NEMinMaxLocationKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEMinMaxLocation : public IFunction -{ -public: - /** Constructor */ - NEMinMaxLocation(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocation(const NEMinMaxLocation &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocation &operator=(const NEMinMaxLocation &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLocation(NEMinMaxLocation &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLocation &operator=(NEMinMaxLocation &&) = delete; - /** Default destructor */ - ~NEMinMaxLocation(); - /** Initialise the kernel's inputs and outputs. - * - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_loc (Optional) Array of minimum value locations. - * @param[out] max_loc (Optional) Array of maximum value locations. - * @param[out] min_count (Optional) Number of minimum value encounters. - * @param[out] max_count (Optional) Number of maximum value encounters. - */ - void configure(const IImage *input, void *min, void *max, - ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, - uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); - - // Inherited methods overridden: - void run() override; - -private: - std::unique_ptr _min_max; /**< Kernel that performs min/max */ - std::unique_ptr _min_max_loc; /**< Kernel that extracts min/max locations */ -}; -} -#endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */ diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h deleted file mode 100644 index 1cd523c4dd..0000000000 --- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONLINEARFILTER_H -#define ARM_COMPUTE_NENONLINEARFILTER_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute non linear filter. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NENonLinearFilterKernel - * - * @note Supported mask dimensions squares of sizes 3, 5 - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NENonLinearFilter : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, conv and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, - uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NENONLINEARFILTER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h deleted file mode 100644 index 34f466b81c..0000000000 --- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEOPTICALFLOW_H -#define ARM_COMPUTE_NEOPTICALFLOW_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include -#include - -namespace arm_compute -{ -class Pyramid; -class NELKTrackerKernel; - -/** Array of LK Internel Keypoints */ -using LKInternalKeypointArray = Array; -/** Basic function to execute optical flow. This function calls the following Neon kernels and functions: - * - * -# @ref NEScharr3x3 - * -# @ref NELKTrackerKernel - * - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEOpticalFlow : public IFunction -{ -public: - /** Constructor - * - * @param[in] memory_manager (Optional) Memory manager. - */ - NEOpticalFlow(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEOpticalFlow(const NEOpticalFlow &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEOpticalFlow &operator=(const NEOpticalFlow &) = delete; - /** Default destructor */ - ~NEOpticalFlow(); - /** Initialise the function input and output - * - * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data type supported U8 - * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data type supported U8 - * @param[in] old_points Pointer to the IKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points - * @param[out] new_points Pointer to the IKeyPointArray storing new key points - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] border_mode The border mode applied at scharr kernel stage - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT - * - */ - void configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, - IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, - bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - std::vector _func_scharr; - std::vector> _kernel_tracker; - std::vector _scharr_gx; - std::vector _scharr_gy; - IKeyPointArray *_new_points; - const IKeyPointArray *_new_points_estimates; - const IKeyPointArray *_old_points; - LKInternalKeypointArray _new_points_internal; - LKInternalKeypointArray _old_points_internal; - unsigned int _num_levels; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEOPTICALFLOW_H */ diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h deleted file mode 100644 index 1202f1878d..0000000000 --- a/arm_compute/runtime/NEON/functions/NEPhase.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPHASE_H -#define ARM_COMPUTE_NEPHASE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ITensorInfo; - -/** Basic function to run @ref NEMagnitudePhaseKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEPhase : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs, output. - * - * @param[in] input1 First tensor input. Data type supported: S16. - * @param[in] input2 Second tensor input. Data type supported: S16. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPHASE_H */ diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h deleted file mode 100644 index 84d0f2ee92..0000000000 --- a/arm_compute/runtime/NEON/functions/NERemap.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREMAP_H -#define ARM_COMPUTE_NEREMAP_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" -#include "arm_compute/runtime/Tensor.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute remap. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NERemapKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NERemap : public INESimpleFunction -{ -public: - /** Initialise the function's sources, destination, interpolation policy and border mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[in] map_x Map for X coordinates. Data type supported: F32. - * @param[in] map_y Map for Y coordinates. Data type supported: F32. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. - * @param[in] border_mode Border mode to use on the input tensor. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, - InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEREMAP_H */ diff --git a/arm_compute/runtime/NEON/functions/NEScharr3x3.h b/arm_compute/runtime/NEON/functions/NEScharr3x3.h deleted file mode 100644 index e5ed1c7c5a..0000000000 --- a/arm_compute/runtime/NEON/functions/NEScharr3x3.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCHARR3x3_H -#define ARM_COMPUTE_NESCHARR3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute scharr 3x3 filter. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NEScharr3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NEScharr3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NESCHARR3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h deleted file mode 100644 index bc5f5e4545..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel3x3.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL3x3_H -#define ARM_COMPUTE_NESOBEL3x3_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to execute sobel 3x3 filter. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel3x3Kernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NESobel3x3 : public INESimpleFunction -{ -public: - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NESOBEL3x3_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h deleted file mode 100644 index e64558c451..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel5x5.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL5x5_H -#define ARM_COMPUTE_NESOBEL5x5_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NESobel5x5HorKernel; -class NESobel5x5VertKernel; -class NEFillBorderKernel; - -/** Basic function to execute sobel 5x5 filter. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel5x5HorKernel - * -# @ref NESobel5x5VertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NESobel5x5 : public IFunction -{ -public: - /** Default constructor */ - NESobel5x5(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5(const NESobel5x5 &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5 &operator=(const NESobel5x5 &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel5x5(NESobel5x5 &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel5x5 &operator=(NESobel5x5 &&) = delete; - /** Default destructor */ - ~NESobel5x5(); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data type supported: S16. - * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data type supported: S16. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - std::unique_ptr _sobel_hor; /**< Sobel Horizontal 5x5 kernel */ - std::unique_ptr _sobel_vert; /**< Sobel Vertical 5x5 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - std::unique_ptr _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NESOBEL5x5_H */ diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h deleted file mode 100644 index 6419d67b56..0000000000 --- a/arm_compute/runtime/NEON/functions/NESobel7x7.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL7x7_H -#define ARM_COMPUTE_NESOBEL7x7_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include -#include - -namespace arm_compute -{ -class ITensor; -class NESobel7x7HorKernel; -class NESobel7x7VertKernel; -class NEFillBorderKernel; - -/** Basic function to execute sobel 7x7 filter. This function calls the following Neon kernels: - * - * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref NESobel7x7HorKernel - * -# @ref NESobel7x7VertKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * - */ -class NESobel7x7 : public IFunction -{ -public: - /** Default constructor */ - NESobel7x7(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7(const NESobel7x7 &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7 &operator=(const NESobel7x7 &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel7x7(NESobel7x7 &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NESobel7x7 &operator=(NESobel7x7 &&) = delete; - /** Default destructor */ - ~NESobel7x7(); - /** Initialise the function's source, destinations and border mode. - * - * @note At least one of output_x or output_y must be not NULL. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data type supported: S32. - * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data type supported: S32. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0); - - // Inherited methods overridden: - void run() override; - -protected: - MemoryGroup _memory_group; /**< Function memory group */ - std::unique_ptr _sobel_hor; /**< Sobel Horizontal 7x7 kernel */ - std::unique_ptr _sobel_vert; /**< Sobel Vertical 7x7 kernel */ - Tensor _tmp_x; /**< Temporary buffer for Sobel X */ - Tensor _tmp_y; /**< Temporary buffer for Sobel Y */ - std::unique_ptr _border_handler; /**< Kernel to handle tensor borders */ -}; -} -#endif /*ARM_COMPUTE_NESOBEL7x7_H */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h deleted file mode 100644 index 03674cd297..0000000000 --- a/arm_compute/runtime/NEON/functions/NETableLookup.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETABLELOOKUP_H -#define ARM_COMPUTE_NETABLELOOKUP_H - -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -namespace arm_compute -{ -class ITensor; -class ILut; - -/** Basic function to run @ref NETableLookupKernel */ -class NETableLookup : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the kernel's inputs and output - * - * @param[in] input First tensor input. Data types supported: U8/S16 - * @param[in] lut Input lookup table. - * @param[out] output Output tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ILut *lut, ITensor *output); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETABLELOOKUP_H */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h deleted file mode 100644 index 443571f5a0..0000000000 --- a/arm_compute/runtime/NEON/functions/NEThreshold.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETHRESHOLD_H -#define ARM_COMPUTE_NETHRESHOLD_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" - -#include - -namespace arm_compute -{ -// Forward declarations -class ITensor; -class ITensorInfo; - -/** Basic function to run @ref NEThresholdKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEThreshold : public INESimpleFunctionNoBorder -{ -public: - /** Initialise the function's source, destination, thresholds and threshold type - * - * @param[in] input First tensor input. Data type supported: U8. - * @param[out] output Output tensor. Data type supported: U8. - * @param[in] info Threshold descriptor - */ - void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEThreshold - * - * @param[in] input First tensor input. Data type supported: U8. - * @param[in] output Output tensor. Data type supported: U8. - * @param[in] info Threshold descriptor. - * - * @return A status, containing an error code in case of failure - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLD_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h deleted file mode 100644 index c83a3cefcf..0000000000 --- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPAFFINE_H -#define ARM_COMPUTE_NEWARPAFFINE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEWarpAffineKernel - * - * @deprecated This function is deprecated and will be removed in release 21.05 - * -*/ -class NEWarpAffine : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] matrix The perspective matrix. Must be 2x3 of type float. - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEWARPAFFINE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h deleted file mode 100644 index 31a1477dca..0000000000 --- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPPERSPECTIVE_H -#define ARM_COMPUTE_NEWARPPERSPECTIVE_H - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Basic function to run @ref NEWarpPerspectiveKernel - * - * @deprecated This function is deprecated and is intended to be removed in 21.05 release - * -*/ -class NEWarpPerspective : public INESimpleFunction -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - * @param[in] border_mode Strategy to use for borders. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ITensor *input, ITensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); -}; -} -#endif /*ARM_COMPUTE_NEWARPPERSPECTIVE_H */ diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 5c6b202139..3b340ebe5e 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -86,6 +86,50 @@ If there is more than one release in a month then an extra sequential number is @subsection S2_2_changelog Changelog +v21.05 Public major release + - Removed computer vision support from Neon backend + - Removed the following functions: + - NEAbsoluteDifference + - NEAccumulate + - NEBox3x3 + - NECannyEdge + - NEChannelCombine + - NEChannelExtract + - NEColorConvert + - NEDerivative + - NEDilate + - NEEqualizeHistogram + - NEErode + - NEFastCorners + - NEGaussian3x3 + - NEGaussian5x5 + - NEGaussianPyramid + - NEHOGDescriptor + - NEHOGDetector + - NEHOGGradient + - NEHOGMultiDetection + - NEHarrisCorners + - NEHistogram + - NEIntegralImage + - NELaplacianPyramid + - NELaplacianReconstruct + - NEMagnitude + - NEMeanStdDev + - NEMedian3x3 + - NEMinMaxLocation + - NENonLinearFilter + - NEOpticalFlow + - NEPhase + - NERemap + - NEScharr3x3 + - NESobel3x3 + - NESobel5x5 + - NESobel7x7 + - NETableLookup + - NEThreshold + - NEWarpAffine + - NEWarpPerspective + v21.02 Public major release - Various bug fixes. - Various optimisations. @@ -169,7 +213,7 @@ v20.11 Public major release - Removed padding from Neon kernels: - @ref NEComplexPixelWiseMultiplicationKernel - @ref NENonMaximaSuppression3x3Kernel - - @ref NERemapKernel + - NERemapKernel - @ref NEGEMMInterleave4x4Kernel - @ref NEDirectConvolutionLayerKernel - @ref NEScaleKernel @@ -462,7 +506,7 @@ v20.08 Public major release - Removed padding from: - @ref NEPixelWiseMultiplicationKernel - NEHeightConcatenateLayerKernel - - @ref NEThresholdKernel + - NEThresholdKernel - NEBatchConcatenateLayerKernel - @ref NETransposeKernel - @ref NEBatchNormalizationLayerKernel @@ -490,7 +534,7 @@ v20.08 Public major release - NEGEMMLowpQuantizeDownInt32ToUint8Scale - NEGEMMMatrixAccumulateBiasesKernel - Deprecated functions / interfaces: - - Non-descriptor based interfaces for @ref NEThreshold, @ref CLThreshold + - Non-descriptor based interfaces for NEThreshold, @ref CLThreshold - Non-descriptor based interfaces for @ref NEScale, @ref CLScale and @ref GCScale - In @ref NESoftmaxLayer, @ref NELogSoftmaxLayer, @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and @ref GCSoftmaxLayer : The default "axis" value for @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and @ref GCSoftmaxLayer is changed from 1 to 0. @@ -1270,13 +1314,13 @@ v17.04 Public bug fixes release - @ref CLGaussianPyramidHorKernel - @ref CLGaussianPyramidVertKernel - @ref CLGradientKernel - - @ref NEChannelCombineKernel + - NEChannelCombineKernel - @ref NEFillArrayKernel - - @ref NEGaussianPyramidHorKernel - - @ref NEGaussianPyramidVertKernel + - NEGaussianPyramidHorKernel + - NEGaussianPyramidVertKernel - NEHarrisScoreFP16Kernel - - @ref NEHarrisScoreKernel - - @ref NEHOGDetectorKernel + - NEHarrisScoreKernel + - NEHOGDetectorKernel - NELogits1DMaxKernel - NELogits1DShiftExpSumKernel - NELogits1DNormKernel @@ -1321,8 +1365,8 @@ v17.02.1 Sources preview - @ref CLMinMaxKernel, @ref CLMinMaxLocationKernel / @ref CLMinMaxLocation - @ref CLNonLinearFilterKernel / @ref CLNonLinearFilter - New Neon FP16 kernels (Requires armv8.2 CPU) - - @ref NEAccumulateWeightedFP16Kernel - - @ref NEBox3x3FP16Kernel + - NEAccumulateWeightedFP16Kernel + - NEBox3x3FP16Kernel - @ref NENonMaximaSuppression3x3FP16Kernel v17.02 Sources preview @@ -1333,8 +1377,8 @@ v17.02 Sources preview - @ref CLFastCornersKernel / @ref CLFastCorners - @ref CLMeanStdDevKernel / @ref CLMeanStdDev - New Neon kernels / functions: - - HOG / SVM: @ref NEHOGOrientationBinningKernel, @ref NEHOGBlockNormalizationKernel, @ref NEHOGDetectorKernel, NEHOGNonMaximaSuppressionKernel / @ref NEHOGDescriptor, @ref NEHOGDetector, @ref NEHOGGradient, @ref NEHOGMultiDetection - - @ref NENonLinearFilterKernel / @ref NENonLinearFilter + - HOG / SVM: NEHOGOrientationBinningKernel, NEHOGBlockNormalizationKernel, NEHOGDetectorKernel, NEHOGNonMaximaSuppressionKernel / NEHOGDescriptor, NEHOGDetector, NEHOGGradient, NEHOGMultiDetection + - NENonLinearFilterKernel / NENonLinearFilter - Introduced a CLScheduler to manage the default context and command queue used by the runtime library and create synchronisation events. - Switched all the kernels / functions to use tensors instead of images. - Updated documentation to include instructions to build the library from sources. diff --git a/docs/01_library.dox b/docs/01_library.dox index 848b060e9f..641fc3e11b 100644 --- a/docs/01_library.dox +++ b/docs/01_library.dox @@ -185,13 +185,13 @@ This is a very basic implementation which was originally used in the Neon runtim @sa CPPScheduler -@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and a unique thread_id between 0 and num_threads must be assigned to the @ref ThreadInfo object passed to the ```run``` function. +@note Some kernels need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and a unique thread_id between 0 and num_threads must be assigned to the @ref ThreadInfo object passed to the ```run``` function. @subsection S4_2_4 Functions Functions will automatically allocate the temporary buffers mentioned above, and will automatically multi-thread kernels' executions using the very basic scheduler described in the previous section. -Simple functions only call a single kernel (e.g @ref NEConvolution3x3), while more complex ones consist of several kernels pipelined together (e.g @ref NEGaussianPyramid, @ref NEHarrisCorners). Check their documentation to find out which kernels are used by each function. +Simple functions only call a single kernel (e.g @ref NEConvolution3x3), while more complex ones consist of several kernels pipelined together (e.g @ref NEFullyConnectedLayer ). Check their documentation to find out which kernels are used by each function. @code{.cpp} //Create a function object: @@ -230,11 +230,7 @@ For example: @subsection S4_4_2_cl_neon OpenCL / Neon interoperability -You can mix OpenCL and Neon kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects, for example: - -@snippet neoncl_scale_median_gaussian.cpp Neon / OpenCL Interop - -@sa main_neoncl_scale_median_gaussian +You can mix OpenCL and Neon kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects. @section S4_5_algorithms Algorithms diff --git a/docs/06_functions_list.dox b/docs/06_functions_list.dox index 96dce94a89..19485a8ac1 100644 --- a/docs/06_functions_list.dox +++ b/docs/06_functions_list.dox @@ -33,58 +33,39 @@ namespace arm_compute - @ref IFunction - @ref INESimpleFunction - - @ref NEAbsoluteDifference - @ref NEArithmeticAddition - @ref NEArithmeticSubtraction - @ref NEBoundingBoxTransform - - @ref NEBox3x3 - @ref NECast - @ref NEComplexPixelWiseMultiplication - @ref NEConvolution3x3 - @ref NEConvolutionRectangle - - @ref NEDilate - @ref NEElementwiseComparison - @ref NEElementwiseComparisonStatic - @ref NEElementwiseDivision - @ref NEElementwiseMax - @ref NEElementwiseMin - @ref NEElementwiseSquaredDiff - - @ref NEErode - @ref NEExpLayer - - @ref NEGaussian3x3 - - @ref NEIntegralImage - @ref NELogicalAnd - @ref NELogicalNot - @ref NELogicalOr - - @ref NEMedian3x3 - - @ref NENonLinearFilter - @ref NENonMaximaSuppression3x3 - @ref NEPixelWiseMultiplication - @ref NEPReluLayer - - @ref NERemap - @ref NEROIAlignLayer - @ref NERoundLayer - @ref NERsqrtLayer - - @ref NEScharr3x3 - @ref NESelect - - @ref NESobel3x3 - @ref NEStridedSlice - - @ref NEWarpAffine - - @ref NEWarpPerspective - @ref INESimpleFunctionNoBorder - - @ref NEAccumulate - - @ref NEAccumulateSquared - - @ref NEAccumulateWeighted - @ref NEActivationLayer - @ref NEBatchToSpaceLayer - @ref NEBitwiseAnd - @ref NEBitwiseNot - @ref NEBitwiseOr - @ref NEBitwiseXor - - @ref NEChannelCombine - - @ref NEChannelExtract - @ref NEChannelShuffleLayer - - @ref NEColorConvert - @ref NECopy - @ref NEDepthConvertLayer - @ref NEFlattenLayer @@ -93,23 +74,17 @@ namespace arm_compute - @ref NEGather - @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint - @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint - - @ref NEHOGDetector - - @ref NEMagnitude - @ref NEMeanStdDevNormalizationLayer - @ref NEPermute - - @ref NEPhase - @ref NEPriorBoxLayer - @ref NEReorgLayer - @ref NEReshapeLayer - @ref NEReverse - @ref NESlice - - @ref NETableLookup - - @ref NEThreshold - @ref NETile - @ref NETranspose - @ref NEArgMinMaxLayer - @ref NEBatchNormalizationLayer - - @ref NECannyEdge - @ref NEComplexPixelWiseMultiplication - @ref NEConcatenateLayer - @ref NEConvertFullyConnectedWeights @@ -121,11 +96,8 @@ namespace arm_compute - @ref NEDepthwiseConvolutionAssemblyDispatch - @ref NEDepthwiseConvolutionLayer - @ref NEDequantizationLayer - - @ref NEDerivative - @ref NEDetectionPostProcessLayer - @ref NEDirectConvolutionLayer - - @ref NEEqualizeHistogram - - @ref NEFastCorners - @ref NEFFT1D - @ref NEFFT2D - @ref NEFFTConvolutionLayer @@ -133,33 +105,19 @@ namespace arm_compute - @ref NEFillBorder - @ref NEFullyConnectedLayer - @ref NEFuseBatchNormalization - - @ref NEGaussian5x5 - - @ref NEGaussianPyramid - - @ref NEGaussianPyramidHalf - - @ref NEGaussianPyramidOrb - @ref NEGEMM - @ref NEGEMMAssemblyDispatch - @ref NEGEMMConv2d - @ref NEGEMMConvolutionLayer - @ref NEGEMMLowpMatrixMultiplyCore - @ref NEGenerateProposalsLayer - - @ref NEHarrisCorners - - @ref NEHistogram - - @ref NEHOGDescriptor - - @ref NEHOGGradient - - @ref NEHOGMultiDetection - @ref NEInstanceNormalizationLayer - @ref NEL2NormalizeLayer - - @ref NELaplacianPyramid - - @ref NELaplacianReconstruct - @ref NELSTMLayer - @ref NELSTMLayerQuantized - @ref NEQLSTMLayer - @ref NEMaxUnpoolingLayer - - @ref NEMeanStdDev - - @ref NEMinMaxLocation - @ref NENormalizationLayer - - @ref NEOpticalFlow - @ref NEPadLayer - @ref NEPoolingLayer - @ref NEQuantizationLayer @@ -169,8 +127,6 @@ namespace arm_compute - @ref NERNNLayer - @ref NEROIPoolingLayer - @ref NEScale - - @ref NESobel5x5 - - @ref NESobel7x7 - @ref NESoftmaxLayerGeneric <IS_LOG> - @ref NESpaceToBatchLayer - @ref NESpaceToDepthLayer diff --git a/examples/neon_cartoon_effect.cpp b/examples/neon_cartoon_effect.cpp deleted file mode 100644 index 24a689bee9..0000000000 --- a/examples/neon_cartoon_effect.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/NEFunctions.h" - -#include "arm_compute/core/Types.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -class NEONCartoonEffectExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - // Open PPM file - PPMLoader ppm; - - if(argc < 2) - { - // Print help - std::cout << "Usage: ./build/neon_cartoon_effect [input_image.ppm]\n\n"; - std::cout << "No input_image provided, creating a dummy 640x480 image\n"; - // Create an empty grayscale 640x480 image - src_img.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm.open(argv[1]); - ppm.init_image(src_img, Format::U8); - } - - // Initialize just the dimensions and format of the images: - gaus5x5_img.allocator()->init(*src_img.info()); - canny_edge_img.allocator()->init(*src_img.info()); - dst_img.allocator()->init(*src_img.info()); - - // Configure the functions to call - gaus5x5.configure(&src_img, &gaus5x5_img, BorderMode::REPLICATE); - canny_edge.configure(&src_img, &canny_edge_img, 100, 80, 3, 1, BorderMode::REPLICATE); - sub.configure(&gaus5x5_img, &canny_edge_img, &dst_img, ConvertPolicy::SATURATE); - - // Now that the padding requirements are known we can allocate the images: - src_img.allocator()->allocate(); - dst_img.allocator()->allocate(); - gaus5x5_img.allocator()->allocate(); - canny_edge_img.allocator()->allocate(); - - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) - { - ppm.fill_image(src_img); - output_filename = std::string(argv[1]) + "_out.ppm"; - } - - return true; - } - - void do_run() override - { - // Execute the functions: - gaus5x5.run(); - canny_edge.run(); - sub.run(); - } - - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - save_to_ppm(dst_img, output_filename); // save_to_ppm maps and unmaps the image to store as PPM - } - } - -private: - Image src_img{}, dst_img{}, gaus5x5_img{}, canny_edge_img{}; - NEGaussian5x5 gaus5x5{}; - NECannyEdge canny_edge{}; - NEArithmeticSubtraction sub{}; - std::string output_filename{}; -}; - -/** Main program for cartoon effect test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example(argc, argv); -} diff --git a/examples/neon_opticalflow.cpp b/examples/neon_opticalflow.cpp deleted file mode 100644 index ff9478cc18..0000000000 --- a/examples/neon_opticalflow.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/NEFunctions.h" - -#include "arm_compute/core/Types.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -#include -#include -#include - -using namespace arm_compute; -using namespace utils; - -class NeonOpticalFlowExample : public Example -{ -public: - NeonOpticalFlowExample() - : input_points(100), output_points(100), point_estimates(100) - { - } - - bool do_setup(int argc, char **argv) override - { - if(argc < 5) - { - // Print help - std::cout << "Usage: ./build/neon_opticalflow [src_1st.ppm] [src_2nd.ppm] [keypoints] [estimates]\n\n"; - const unsigned int img_width = 64; - const unsigned int img_height = 64; - const unsigned int rect_x = 20; - const unsigned int rect_y = 40; - const unsigned int rect_s = 8; - const unsigned int offsetx = 24; - const unsigned int offsety = 3; - std::cout << "No input_image provided, creating test data:\n"; - std::cout << "\t Image src_1st = (" << img_width << "," << img_height << ")" << std::endl; - std::cout << "\t Image src_2nd = (" << img_width << "," << img_height << ")" << std::endl; - init_img(src_1st, img_width, img_height, rect_x, rect_y, rect_s); - init_img(src_2nd, img_width, img_height, rect_x + offsetx, rect_y + offsety, rect_s); - const int num_points = 4; - input_points.resize(num_points); - point_estimates.resize(num_points); - const std::array tracking_coordsx = { rect_x - 1, rect_x, rect_x + 1, rect_x + 2 }; - const std::array tracking_coordsy = { rect_y - 1, rect_y, rect_y + 1, rect_y + 2 }; - const std::array estimate_coordsx = { rect_x + offsetx - 1, rect_x + offsetx, rect_x + offsetx + 1, rect_x + offsetx + 2 }; - const std::array estimate_coordsy = { rect_y + offsety - 1, rect_y + offsety, rect_y + offsety + 1, rect_y + offsety + 2 }; - - for(int k = 0; k < num_points; ++k) - { - auto &keypoint = input_points.at(k); - keypoint.x = tracking_coordsx[k]; - keypoint.y = tracking_coordsy[k]; - keypoint.tracking_status = 1; - } - for(int k = 0; k < num_points; ++k) - { - auto &keypoint = point_estimates.at(k); - keypoint.x = estimate_coordsx[k]; - keypoint.y = estimate_coordsy[k]; - keypoint.tracking_status = 1; - } - } - else - { - load_ppm(argv[1], src_1st); - load_ppm(argv[2], src_2nd); - load_keypoints(argv[3], input_points); - load_keypoints(argv[4], point_estimates); - } - - print_points(input_points, "Tracking points : "); - print_points(point_estimates, "Estimates points : "); - - const unsigned int num_levels = 3; - // Initialise and allocate pyramids - PyramidInfo pyramid_info(num_levels, SCALE_PYRAMID_HALF, src_1st.info()->tensor_shape(), src_1st.info()->format()); - pyr_1st.init_auto_padding(pyramid_info); - pyr_2nd.init_auto_padding(pyramid_info); - - pyrf_1st.configure(&src_1st, &pyr_1st, BorderMode::UNDEFINED, 0); - pyrf_2nd.configure(&src_2nd, &pyr_2nd, BorderMode::UNDEFINED, 0); - - output_points.resize(input_points.num_values()); - - optkf.configure(&pyr_1st, &pyr_2nd, - &input_points, &point_estimates, &output_points, - Termination::TERM_CRITERIA_BOTH, 0.01f, 15, 5, true, BorderMode::UNDEFINED, 0); - - pyr_1st.allocate(); - pyr_2nd.allocate(); - - return true; - } - void do_run() override - { - //Execute the functions: - pyrf_1st.run(); - pyrf_2nd.run(); - optkf.run(); - } - void do_teardown() override - { - print_points(output_points, "Output points : "); - } - -private: - /** Loads the input keypoints from a file into an array - * - * @param[in] fn Filename containing the keypoints. Each line must have two values X Y. - * @param[out] img Reference to an unintialised KeyPointArray - */ - bool load_keypoints(const std::string &fn, KeyPointArray &array) - { - assert(!fn.empty()); - std::ifstream f(fn); - if(f.is_open()) - { - std::cout << "Reading points from " << fn << std::endl; - std::vector v; - for(std::string line; std::getline(f, line);) - { - std::stringstream ss(line); - std::string xcoord; - std::string ycoord; - getline(ss, xcoord, ' '); - getline(ss, ycoord, ' '); - KeyPoint kp; - kp.x = std::stoi(xcoord); - kp.y = std::stoi(ycoord); - kp.tracking_status = 1; - v.push_back(kp); - } - const int num_points = v.size(); - array.resize(num_points); - for(int k = 0; k < num_points; ++k) - { - auto &keypoint = array.at(k); - keypoint = v[k]; - } - return true; - } - else - { - std::cout << "Cannot open keypoints file " << fn << std::endl; - return false; - } - } - - /** Creates and Image and fills it with the ppm data from the file - * - * @param[in] fn PPM filename to be loaded - * @param[out] img Reference to an unintialised image instance - */ - bool load_ppm(const std::string &fn, Image &img) - { - assert(!fn.empty()); - PPMLoader ppm; - ppm.open(fn); - ppm.init_image(img, Format::U8); - img.allocator()->allocate(); - if(ppm.is_open()) - { - std::cout << "Reading image " << fn << std::endl; - ppm.fill_image(img); - return true; - } - else - { - std::cout << "Cannot open " << fn << std::endl; - return false; - } - } - /** Creates and Image and draws a square in the specified coordinares. - * - * @param[out] img Reference to an unintialised image instance - * @param[in] img_width Width of the image to be created - * @param[in] img_height Height of the image to be created - * @param[in] square_center_x Coordinate along x-axis to be used as the center for the square - * @param[in] square_center_y Coordinate along y-axis to be used as the center for the square - * @param[in] square_size Size in pixels to be used for the square - */ - void init_img(Image &img, unsigned int img_width, unsigned int img_height, - unsigned int square_center_x, unsigned int square_center_y, - unsigned int square_size) - { - img.allocator()->init(TensorInfo(img_width, img_height, Format::U8)); - img.allocator()->allocate(); - const unsigned int square_half = square_size / 2; - // assert the square is in the bounds of the image - assert(square_center_x > square_half && square_center_x + square_half < img_width); - assert(square_center_y > square_half && square_center_y + square_half < img_height); - // get ptr to the top left pixel for the squeare - std::fill(img.buffer(), img.buffer() + img_width * img_height, 0); - for(unsigned int i = 0; i < square_size; ++i) - { - for(unsigned int j = 0; j < square_size; ++j) - { - uint8_t *ptr = img.ptr_to_element(Coordinates(square_center_x - square_half + j, square_center_y - square_half + i)); - *ptr = 0xFF; - } - } - } - /** Prints an array of keypoints and an optional label - * - * @param[in] a Keypoint array to be printed - * @param[in] str Label to be printed before the array - */ - void print_points(const KeyPointArray &a, const std::string &str = "") - { - std::cout << str << std::endl; - for(unsigned int k = 0; k < a.num_values(); ++k) - { - auto kp = a.at(k); - std::cout << "\t " - << " (x,y) = (" << kp.x << "," << kp.y << ")"; - std::cout << " strength = " << kp.strength << " " - << " scale = " << kp.scale << " orientation " << kp.orientation << " status " << kp.tracking_status << " err = " << kp.error << std::endl; - } - } - - Pyramid pyr_1st{}; - Pyramid pyr_2nd{}; - NEGaussianPyramidHalf pyrf_1st{}; - NEGaussianPyramidHalf pyrf_2nd{}; - NEOpticalFlow optkf{}; - Image src_1st{}, src_2nd{}; - KeyPointArray input_points; - KeyPointArray output_points; - KeyPointArray point_estimates; -}; - -/** Main program for optical flow test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example(argc, argv); -} diff --git a/examples/neoncl_scale_median_gaussian.cpp b/examples/neoncl_scale_median_gaussian.cpp deleted file mode 100644 index 2580a35f24..0000000000 --- a/examples/neoncl_scale_median_gaussian.cpp +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */ -#error "This example needs to be built with -DARM_COMPUTE_CL" -#endif /* ARM_COMPUTE_CL */ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" -#include "arm_compute/runtime/CL/functions/CLScale.h" -#include "arm_compute/runtime/NEON/NEFunctions.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -/** Example demonstrating how to use both CL and Neon functions in the same pipeline - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -class NEONCLScaleMedianGaussianExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - /** [Neon / OpenCL Interop] */ - PPMLoader ppm; - - CLScheduler::get().default_init(); - - if(argc < 2) - { - // Print help - std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n"; - std::cout << "No input_image provided, creating a dummy 640x480 image\n"; - // Create an empty grayscale 640x480 image - src.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm.open(argv[1]); - ppm.init_image(src, Format::U8); - } - - TensorInfo scale_median_info(TensorInfo(src.info()->dimension(0) / 2, src.info()->dimension(1) / 2, Format::U8)); - - // Configure the temporary and destination images - scale_median.allocator()->init(scale_median_info); - median_gauss.allocator()->init(scale_median_info); - dst.allocator()->init(scale_median_info); - - scale.configure(&src, &scale_median, ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE }); - median.configure(&scale_median, &median_gauss, BorderMode::REPLICATE); - gauss.configure(&median_gauss, &dst, BorderMode::REPLICATE); - - // Allocate all the images - src.allocator()->allocate(); - scale_median.allocator()->allocate(); - median_gauss.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) - { - ppm.fill_image(src); - const std::string output_filename = std::string(argv[1]) + "_out.ppm"; - } - /** [Neon / OpenCL Interop] */ - - return true; - } - void do_run() override - { - // Enqueue and flush the OpenCL kernel: - scale.run(); - - // Do a blocking map of the input and output buffers of the Neon function: - scale_median.map(); - median_gauss.map(); - - // Run the Neon function: - median.run(); - - // Unmap the output buffer before it's used again by OpenCL: - scale_median.unmap(); - median_gauss.unmap(); - - // Run the final OpenCL function: - gauss.run(); - - // Make sure all the OpenCL jobs are done executing: - CLScheduler::get().sync(); - } - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM - } - } - -private: - CLImage src{}, scale_median{}, median_gauss{}, dst{}; - CLScale scale{}; - NEMedian3x3 median{}; - CLGaussian5x5 gauss{}; - std::string output_filename{}; -}; - -/** Main program for neon/cl scale median gaussian test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example(argc, argv); -} diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h index b962c9eeee..aea245c6fb 100644 --- a/src/core/NEON/NEKernels.h +++ b/src/core/NEON/NEKernels.h @@ -24,9 +24,6 @@ #ifndef ARM_COMPUTE_NEKERNELS_H #define ARM_COMPUTE_NEKERNELS_H -/* Header regrouping all the Neon kernels */ -#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" -#include "src/core/NEON/kernels/NEAccumulateKernel.h" #include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "src/core/NEON/kernels/NEBitwiseAndKernel.h" @@ -34,13 +31,8 @@ #include "src/core/NEON/kernels/NEBitwiseOrKernel.h" #include "src/core/NEON/kernels/NEBitwiseXorKernel.h" #include "src/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "src/core/NEON/kernels/NEBox3x3Kernel.h" -#include "src/core/NEON/kernels/NECannyEdgeKernel.h" -#include "src/core/NEON/kernels/NEChannelCombineKernel.h" -#include "src/core/NEON/kernels/NEChannelExtractKernel.h" #include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" #include "src/core/NEON/kernels/NECol2ImKernel.h" -#include "src/core/NEON/kernels/NEColorConvertKernel.h" #include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" #include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" #include "src/core/NEON/kernels/NEConvolutionKernel.h" @@ -50,15 +42,11 @@ #include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" #include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" #include "src/core/NEON/kernels/NEDequantizationLayerKernel.h" -#include "src/core/NEON/kernels/NEDerivativeKernel.h" -#include "src/core/NEON/kernels/NEDilateKernel.h" #include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" #include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "src/core/NEON/kernels/NEErodeKernel.h" #include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h" #include "src/core/NEON/kernels/NEFFTRadixStageKernel.h" #include "src/core/NEON/kernels/NEFFTScaleKernel.h" -#include "src/core/NEON/kernels/NEFastCornersKernel.h" #include "src/core/NEON/kernels/NEFillArrayKernel.h" #include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" @@ -75,28 +63,15 @@ #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/NEON/kernels/NEGatherKernel.h" -#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" -#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" -#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" #include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" -#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" -#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" -#include "src/core/NEON/kernels/NEHistogramKernel.h" #include "src/core/NEON/kernels/NEIm2ColKernel.h" #include "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" -#include "src/core/NEON/kernels/NEIntegralImageKernel.h" #include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h" #include "src/core/NEON/kernels/NELKTrackerKernel.h" #include "src/core/NEON/kernels/NELogicalKernel.h" -#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" #include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h" -#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" #include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" -#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" #include "src/core/NEON/kernels/NEMinMaxLayerKernel.h" -#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" -#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" #include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" #include "src/core/NEON/kernels/NENormalizationLayerKernel.h" #include "src/core/NEON/kernels/NEPadLayerKernel.h" @@ -108,24 +83,16 @@ #include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h" #include "src/core/NEON/kernels/NERangeKernel.h" #include "src/core/NEON/kernels/NEReductionOperationKernel.h" -#include "src/core/NEON/kernels/NERemapKernel.h" #include "src/core/NEON/kernels/NEReorgLayerKernel.h" #include "src/core/NEON/kernels/NEReverseKernel.h" #include "src/core/NEON/kernels/NEScaleKernel.h" -#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" #include "src/core/NEON/kernels/NESelectKernel.h" -#include "src/core/NEON/kernels/NESobel3x3Kernel.h" -#include "src/core/NEON/kernels/NESobel5x5Kernel.h" -#include "src/core/NEON/kernels/NESobel7x7Kernel.h" #include "src/core/NEON/kernels/NESpaceToBatchLayerKernel.h" #include "src/core/NEON/kernels/NESpaceToDepthLayerKernel.h" #include "src/core/NEON/kernels/NEStackLayerKernel.h" #include "src/core/NEON/kernels/NEStridedSliceKernel.h" -#include "src/core/NEON/kernels/NETableLookupKernel.h" -#include "src/core/NEON/kernels/NEThresholdKernel.h" #include "src/core/NEON/kernels/NETileKernel.h" #include "src/core/NEON/kernels/NETransposeKernel.h" -#include "src/core/NEON/kernels/NEWarpKernel.h" #include "src/core/NEON/kernels/NEWeightsReshapeKernel.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp deleted file mode 100644 index a6a41b8af9..0000000000 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -namespace arm_compute -{ -namespace -{ -void abs_diff_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) -{ - Iterator input1(in1, window); - Iterator input2(in2, window); - Iterator output(out, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t input1_val = vld1q_u8(input1.ptr()); - const uint8x16_t input2_val = vld1q_u8(input2.ptr()); - - vst1q_u8(output.ptr(), vabdq_u8(input1_val, input2_val)); - }, - input1, input2, output); -} - -inline int16x8x2_t vqabd2q_s16(const int16x8x2_t &v1, const int16x8x2_t &v2) -{ - const int16x8x2_t res = - { - { - vqabsq_s16(vqsubq_s16(v1.val[0], v2.val[0])), - vqabsq_s16(vqsubq_s16(v1.val[1], v2.val[1])) - } - }; - - return res; -} - -void abs_diff_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) -{ - Iterator input1(in1, window); - Iterator input2(in2, window); - Iterator output(out, window); - - execute_window_loop(window, [&](const Coordinates &) - { - int16x8x2_t input1_val = vld2q_s16(reinterpret_cast(input1.ptr())); - int16x8x2_t input2_val = vld2q_s16(reinterpret_cast(input2.ptr())); - vst2q_s16(reinterpret_cast(output.ptr()), vqabd2q_s16(input1_val, input2_val)); - }, - input1, input2, output); -} - -void abs_diff_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) -{ - Iterator input1(in1, window); - Iterator input2(in2, window); - Iterator output(out, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t input1_val = vld1q_u8(input1.ptr()); - const int16x8x2_t input2_val = - { - { - vld1q_s16(reinterpret_cast(input2.ptr())), - vld1q_s16(reinterpret_cast(input2.ptr()) + 8) - } - }; - - const int16x8x2_t out_val = - { - { - vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input1_val))), input2_val.val[0])), - vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(input1_val))), input2_val.val[1])) - } - }; - - vst1q_s16(reinterpret_cast(output.ptr()), out_val.val[0]); - vst1q_s16(reinterpret_cast(output.ptr()) + 8, out_val.val[1]); - - }, - input1, input2, output); -} - -void abs_diff_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) -{ - abs_diff_U8_S16_S16(in2, in1, out, window); -} -} // namespace - -NEAbsoluteDifferenceKernel::NEAbsoluteDifferenceKernel() - : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void NEAbsoluteDifferenceKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - - set_shape_if_empty(*output->info(), input1->info()->tensor_shape()); - - if(input1->info()->data_type() == DataType::S16 || input2->info()->data_type() == DataType::S16) - { - set_format_if_unknown(*output->info(), Format::S16); - } - else if(input1->info()->data_type() == DataType::U8 || input2->info()->data_type() == DataType::U8) - { - set_format_if_unknown(*output->info(), Format::U8); - } - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8), - "The output image can only be U8 if both input images are U8"); - - _input1 = input1; - _input2 = input2; - _output = output; - - const DataType input1_data_type = input1->info()->data_type(); - const DataType input2_data_type = input2->info()->data_type(); - - if(input1_data_type == input2_data_type) - { - if(input1_data_type == DataType::U8) - { - _func = &abs_diff_U8_U8_U8; - } - else - { - _func = &abs_diff_S16_S16_S16; - } - } - else - { - if(input1_data_type == DataType::U8) - { - _func = &abs_diff_U8_S16_S16; - } - else - { - _func = &abs_diff_S16_U8_S16; - } - } - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Configure kernel window - Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input1->info(), 0, num_elems_processed_per_iteration), - AccessWindowHorizontal(input2->info(), 0, num_elems_processed_per_iteration), - output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), - input2->info()->valid_region()); - - output_access.set_valid_region(win, valid_region); - - INEKernel::configure(win); -} - -void NEAbsoluteDifferenceKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - _func(_input1, _input2, _output, window); -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h deleted file mode 100644 index cc95172f35..0000000000 --- a/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H -#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the absolute difference kernel - * - * Absolute difference is computed by: - * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] - */ -class NEAbsoluteDifferenceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEAbsoluteDifferenceKernel"; - } - /** Default constructor */ - NEAbsoluteDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; - /** Default destructor */ - ~NEAbsoluteDifferenceKernel() = default; - - /** Set the inputs and output tensors - * - * @param[in] input1 Source tensor. Data types supported: U8/S16 - * @param[in] input2 Source tensor. Data types supported: U8/S16 - * @param[out] output Destination tensor, Data types supported: U8/S16 - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised absolute difference functions - * - * @param[in] input1 An input tensor. Data types supported: U8/S16. - * @param[in] input2 An input tensor. Data types supported: U8/S16. - * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. - * @param[in] window Region on which to execute the kernel. - */ - using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); - - /** Absolute difference function to use for the particular tensor formats passed to configure() */ - AbsDiffFunction *_func; - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp deleted file mode 100644 index 46179cadcb..0000000000 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEAccumulateKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -namespace arm_compute -{ -/* Max S16 value used for saturation purposes. */ -const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast(INT16_MAX)); - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -namespace fp16 -{ -inline float16x8x2_t convert_u8x16_to_f16x8x2(uint8x16_t input) -{ - const float16x8x2_t out = - { - { - vcvtq_f16_u16(vmovl_u8(vget_low_u8(input))), - vcvtq_f16_u16(vmovl_u8(vget_high_u8(input))) - } - }; - - return out; -} - -inline uint8x16_t convert_f16x8x2_to_u8x16(const float16x8x2_t &input) -{ - return vcombine_u8(vmovn_u16(vcvtq_u16_f16(input.val[0])), - vmovn_u16(vcvtq_u16_f16(input.val[1]))); -} - -inline float16x8x2_t vector_accumulate_weighted(const float16x8x2_t &vec0, const float16x8x2_t &vec1, float16x8_t scale_val, float16x8_t scale_val2) -{ - const float16x8x2_t res = - { - { - vfmaq_f16(vmulq_f16(vec1.val[0], scale_val), vec0.val[0], scale_val2), - vfmaq_f16(vmulq_f16(vec1.val[1], scale_val), vec0.val[1], scale_val2) - } - }; - - return res; -} - -void acc_we_v16_u8(const void *__restrict input, void *__restrict accum, float16x8_t scale_val, float16x8_t scale_val2) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == accum); - - const auto input_ptr = static_cast(input); - const auto accum_ptr = static_cast(accum); - - const uint8x16x4_t input_buffer = vld4q_u8(input_ptr); - uint8x16x4_t accum_buffer = vld4q_u8(accum_ptr); - - const float16x8x2_t f16_input_0 = convert_u8x16_to_f16x8x2(input_buffer.val[0]); - const float16x8x2_t f16_input_1 = convert_u8x16_to_f16x8x2(input_buffer.val[1]); - const float16x8x2_t f16_input_2 = convert_u8x16_to_f16x8x2(input_buffer.val[2]); - const float16x8x2_t f16_input_3 = convert_u8x16_to_f16x8x2(input_buffer.val[3]); - - float16x8x2_t f16_accum_0 = convert_u8x16_to_f16x8x2(accum_buffer.val[0]); - float16x8x2_t f16_accum_1 = convert_u8x16_to_f16x8x2(accum_buffer.val[1]); - float16x8x2_t f16_accum_2 = convert_u8x16_to_f16x8x2(accum_buffer.val[2]); - float16x8x2_t f16_accum_3 = convert_u8x16_to_f16x8x2(accum_buffer.val[3]); - - f16_accum_0 = vector_accumulate_weighted(f16_input_0, f16_accum_0, scale_val, scale_val2); - f16_accum_1 = vector_accumulate_weighted(f16_input_1, f16_accum_1, scale_val, scale_val2); - f16_accum_2 = vector_accumulate_weighted(f16_input_2, f16_accum_2, scale_val, scale_val2); - f16_accum_3 = vector_accumulate_weighted(f16_input_3, f16_accum_3, scale_val, scale_val2); - - accum_buffer = { { - convert_f16x8x2_to_u8x16(f16_accum_0), - convert_f16x8x2_to_u8x16(f16_accum_1), - convert_f16x8x2_to_u8x16(f16_accum_2), - convert_f16x8x2_to_u8x16(f16_accum_3) - } - }; - - vst4q_u8(accum_ptr, accum_buffer); -} -} // namespace fp16 - -void NEAccumulateWeightedFP16Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator input(_input, window); - Iterator accum(_output, window); - - const float16x8_t scale_val = vdupq_n_f16(1.f - _alpha); - const float16x8_t scale_val2 = vdupq_n_f16(_alpha); - - execute_window_loop(window, [&](const Coordinates &) - { - fp16::acc_we_v16_u8(input.ptr(), accum.ptr(), scale_val, scale_val2); - }, - input, accum); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -namespace -{ -inline void acc_v16_u8(const void *__restrict input, void *__restrict accum) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == accum); - - const auto in = static_cast(input); - const auto out = static_cast(accum); - - uint8x16_t ta1 = vld1q_u8(in); - int16x8_t ta2 = vld1q_s16(out); - int16x8_t ta3 = vld1q_s16(out + 8); - - ta2 = vqaddq_s16(ta2, vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(ta1)))); - ta3 = vqaddq_s16(ta3, vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(ta1)))); - - vst1q_s16(out, ta2); - vst1q_s16(out + 8, ta3); -} - -inline float32x4x4_t convert_u8x16_to_f32x4x4(uint8x16_t input) -{ - const uint16x8_t u16_output_low = vmovl_u8(vget_low_u8(input)); - const uint16x8_t u16_output_hi = vmovl_u8(vget_high_u8(input)); - - const float32x4x4_t res = - { - { - vcvtq_f32_u32(vmovl_u16(vget_low_u16(u16_output_low))), - vcvtq_f32_u32(vmovl_u16(vget_high_u16(u16_output_low))), - vcvtq_f32_u32(vmovl_u16(vget_low_u16(u16_output_hi))), - vcvtq_f32_u32(vmovl_u16(vget_high_u16(u16_output_hi))) - } - }; - - return res; -} - -inline uint8x16_t convert_f32x4x4_to_u8x16(const float32x4x4_t &input) -{ - return vcombine_u8(vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(input.val[0])), - vmovn_u32(vcvtq_u32_f32(input.val[1])))), - vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(input.val[2])), - vmovn_u32(vcvtq_u32_f32(input.val[3]))))); -} - -inline float32x4x4_t vector_accumulate_weighted(const float32x4x4_t &vector_input, float32x4x4_t vector_output, float32x4_t scale_val, float32x4_t scale_val2) -{ - vector_output.val[0] = vmulq_f32(vector_output.val[0], scale_val); - vector_output.val[1] = vmulq_f32(vector_output.val[1], scale_val); - vector_output.val[2] = vmulq_f32(vector_output.val[2], scale_val); - vector_output.val[3] = vmulq_f32(vector_output.val[3], scale_val); - - vector_output.val[0] = vmlaq_f32(vector_output.val[0], vector_input.val[0], scale_val2); - vector_output.val[1] = vmlaq_f32(vector_output.val[1], vector_input.val[1], scale_val2); - vector_output.val[2] = vmlaq_f32(vector_output.val[2], vector_input.val[2], scale_val2); - vector_output.val[3] = vmlaq_f32(vector_output.val[3], vector_input.val[3], scale_val2); - - return vector_output; -} - -inline void acc_we_v16_u8(const void *__restrict input, void *__restrict accum, const float32x4_t scale_val, const float32x4_t scale_val2) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == accum); - - const auto input_ptr = static_cast(input); - const auto accum_ptr = static_cast(accum); - - const uint8x16_t input_buffer = vld1q_u8(input_ptr); - const uint8x16_t accum_buffer = vld1q_u8(accum_ptr); - - const float32x4x4_t f32_input_0 = convert_u8x16_to_f32x4x4(input_buffer); - const float32x4x4_t f32_output_0 = convert_u8x16_to_f32x4x4(accum_buffer); - - const float32x4x4_t f32_res_0 = vector_accumulate_weighted(f32_input_0, f32_output_0, scale_val, scale_val2); - - vst1q_u8(accum_ptr, convert_f32x4x4_to_u8x16(f32_res_0)); -} - -void acc_sq_v16_u8(const void *__restrict input, uint32_t shift, void *__restrict accum) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == accum); - ARM_COMPUTE_ERROR_ON(shift > 15); - - const auto input_buffer = static_cast(input); - const auto accum_buffer = static_cast(accum); - - const uint8x16_t ta1 = vld1q_u8(input_buffer); - uint16x8_t ta2 = vreinterpretq_u16_s16(vld1q_s16(accum_buffer)); - uint16x8_t ta3 = vreinterpretq_u16_s16(vld1q_s16(accum_buffer + 8)); - - const int16x8_t vector_shift = vdupq_n_s16(-static_cast(shift)); - - uint16x8_t linput = vmovl_u8(vget_low_u8(ta1)); - uint16x8_t hinput = vmovl_u8(vget_high_u8(ta1)); - - linput = vmulq_u16(linput, linput); - hinput = vmulq_u16(hinput, hinput); - - linput = vqshlq_u16(linput, vector_shift); - hinput = vqshlq_u16(hinput, vector_shift); - - ta2 = vqaddq_u16(ta2, linput); - ta3 = vqaddq_u16(ta3, hinput); - - vst1q_s16(accum_buffer, vreinterpretq_s16_u16(vminq_u16(max_int_u16, ta2))); - vst1q_s16(accum_buffer + 8, vreinterpretq_s16_u16(vminq_u16(max_int_u16, ta3))); -} -} // namespace - -void NEAccumulateKernel::configure(const ITensor *input, ITensor *accum) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, accum); - - set_shape_if_empty(*accum->info(), input->info()->tensor_shape()); - - set_format_if_unknown(*accum->info(), Format::S16); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, accum); - - constexpr unsigned int num_elems_processed_per_iteration = 16; - INESimpleKernel::configure(input, accum, num_elems_processed_per_iteration); -} - -void NEAccumulateKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - Iterator input(_input, window); - Iterator accum(_output, window); - - execute_window_loop(window, [&](const Coordinates &) - { - acc_v16_u8(input.ptr(), accum.ptr()); - }, - input, accum); -} - -NEAccumulateWeightedKernel::NEAccumulateWeightedKernel() - : _alpha(0.0f) -{ -} - -void NEAccumulateWeightedKernel::configure(const ITensor *input, float alpha, ITensor *accum) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, accum); - - set_shape_if_empty(*accum->info(), input->info()->tensor_shape()); - - set_format_if_unknown(*accum->info(), Format::U8); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, accum); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(alpha < 0.0 || alpha > 1.0); - - _alpha = alpha; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - INESimpleKernel::configure(input, accum, num_elems_processed_per_iteration); -} - -void NEAccumulateWeightedKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator input(_input, window); - Iterator accum(_output, window); - - const float32x4_t scale_val = vdupq_n_f32(1.f - _alpha); - const float32x4_t scale_val2 = vdupq_n_f32(_alpha); - - execute_window_loop(window, [&](const Coordinates &) - { - acc_we_v16_u8(input.ptr(), accum.ptr(), scale_val, scale_val2); - }, - input, accum); -} - -NEAccumulateSquaredKernel::NEAccumulateSquaredKernel() - : _shift(0) -{ -} - -void NEAccumulateSquaredKernel::configure(const ITensor *input, uint32_t shift, ITensor *accum) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, accum); - - set_shape_if_empty(*accum->info(), input->info()->tensor_shape()); - - set_format_if_unknown(*accum->info(), Format::S16); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, accum); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON(shift > 15); - - _shift = shift; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - INESimpleKernel::configure(input, accum, num_elems_processed_per_iteration); -} - -void NEAccumulateSquaredKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - Iterator input(_input, window); - Iterator accum(_output, window); - - execute_window_loop(window, [&](const Coordinates &) - { - acc_sq_v16_u8(input.ptr(), _shift, accum.ptr()); - }, - input, accum); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NEAccumulateKernel.h b/src/core/NEON/kernels/NEAccumulateKernel.h deleted file mode 100644 index af1298f53f..0000000000 --- a/src/core/NEON/kernels/NEAccumulateKernel.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H -#define ARM_COMPUTE_NEACCUMULATEKERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Interface for the accumulate kernel - * - * Accumulation is computed by: - * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] - */ -class NEAccumulateKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateKernel"; - } - /** Default constructor */ - NEAccumulateKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateKernel(const NEAccumulateKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateKernel &operator=(const NEAccumulateKernel &) = delete; - /** Allow instances of this class to be moved */ - NEAccumulateKernel(NEAccumulateKernel &&) = default; - /** Allow instances of this class to be moved */ - NEAccumulateKernel &operator=(NEAccumulateKernel &&) = default; - /** Default destructor */ - ~NEAccumulateKernel() = default; - /** Set the input and accumulation tensors - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] accum Destination tensor. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; - -/** Interface for the accumulate weighted kernel - * - * Weighted accumulation is computed: - * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] - * - * Where @f$ 0 \le \alpha \le 1 @f$ - * Conceptually, the rounding for this is defined as: - * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] -*/ -class NEAccumulateWeightedKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateWeightedKernel"; - } - /** Default constructor */ - NEAccumulateWeightedKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeightedKernel(const NEAccumulateWeightedKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeightedKernel &operator=(const NEAccumulateWeightedKernel &) = delete; - /** Allow instances of this class to be moved */ - NEAccumulateWeightedKernel(NEAccumulateWeightedKernel &&) = default; - /** Allow instances of this class to be moved */ - NEAccumulateWeightedKernel &operator=(NEAccumulateWeightedKernel &&) = default; - /** Default destructor */ - ~NEAccumulateWeightedKernel() = default; - /** Set the input and accumulation tensors, and the scale value - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] alpha Scalar value in the range [0.0f, 1.0f] - * @param[in,out] accum Accumulated tensor. Data type supported: U8. - */ - void configure(const ITensor *input, float alpha, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - float _alpha; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Interface for the accumulate weighted kernel using F16 */ -class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel -{ -public: - const char *name() const override - { - return "NEAccumulateWeightedFP16Kernel"; - } - /** Default constructor */ - NEAccumulateWeightedFP16Kernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeightedFP16Kernel(const NEAccumulateWeightedFP16Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateWeightedFP16Kernel &operator=(const NEAccumulateWeightedFP16Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEAccumulateWeightedFP16Kernel(NEAccumulateWeightedFP16Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEAccumulateWeightedFP16Kernel &operator=(NEAccumulateWeightedFP16Kernel &&) = default; - /** Default destructor */ - ~NEAccumulateWeightedFP16Kernel() = default; - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** Interface for the accumulate weighted kernel using F16 */ -using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -/** Interface for the accumulate squared kernel - * - * The accumulation of squares is computed: - * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] - * - * Where @f$ 0 \le shift \le 15 @f$ -*/ -class NEAccumulateSquaredKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateSquaredKernel"; - } - /** Default constructor */ - NEAccumulateSquaredKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateSquaredKernel(const NEAccumulateSquaredKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAccumulateSquaredKernel &operator=(const NEAccumulateSquaredKernel &) = delete; - /** Allow instances of this class to be moved */ - NEAccumulateSquaredKernel(NEAccumulateSquaredKernel &&) = default; - /** Allow instances of this class to be moved */ - NEAccumulateSquaredKernel &operator=(NEAccumulateSquaredKernel &&) = default; - /** Default destructor */ - ~NEAccumulateSquaredKernel() = default; - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] shift Shift value in the range of [0, 15] - * @param[in,out] accum Accumulated tensor. Data type supported: S16. - */ - void configure(const ITensor *input, uint32_t shift, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - uint32_t _shift; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp deleted file mode 100644 index 2aa8aa8e99..0000000000 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEBox3x3Kernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Validate.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -using namespace arm_compute; - -int16x8_t calculate_kernel(const uint8x16_t &top_data, const uint8x16_t &mid_data, const uint8x16_t &bot_data) -{ - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t mid_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - //top left - int16x8_t out = top_s16.val[0]; - //top mid - out = vaddq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 1)); - //top right - out = vaddq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 2)); - //mid left - out = vaddq_s16(out, mid_s16.val[0]); - //mid mid - out = vaddq_s16(out, vextq_s16(mid_s16.val[0], mid_s16.val[1], 1)); - //mid right - out = vaddq_s16(out, vextq_s16(mid_s16.val[0], mid_s16.val[1], 2)); - //bot left - out = vaddq_s16(out, bot_s16.val[0]); - //bot mid - out = vaddq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 1)); - //bot right - out = vaddq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2)); - return out; -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator input(_input, window); - Iterator output(_output, window); - - unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-1, -1)); - unsigned char *const input_mid_ptr = _input->ptr_to_element(Coordinates(-1, 0)); - unsigned char *const input_bot_ptr = _input->ptr_to_element(Coordinates(-1, +1)); - - const float16x8_t oneovernine = vdupq_n_f16(1.0f / 9.0f); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - int16x8_t out = calculate_kernel(top_data, mid_data, bot_data); - - float16x8_t outfloat = vcvtq_f16_s16(out); - outfloat = vmulq_f16(outfloat, oneovernine); - - vst1_u8(output.ptr(), vqmovun_s16(vcvtq_s16_f16(outfloat))); - }, - input, output); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -BorderSize NEBox3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void NEBox3x3Kernel::configure(const ITensor *input, ITensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); - - set_format_if_unknown(*input->info(), Format::U8); - set_format_if_unknown(*output->info(), Format::U8); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - constexpr int rect_offset_xy = -1; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, AccessWindowRectangle(input->info(), rect_offset_xy, rect_offset_xy, num_elems_read_per_iteration, num_rows_read_per_iteration), output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEBox3x3Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator input(_input, window); - Iterator output(_output, window); - - unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-1, -1)); - unsigned char *const input_mid_ptr = _input->ptr_to_element(Coordinates(-1, 0)); - unsigned char *const input_bot_ptr = _input->ptr_to_element(Coordinates(-1, +1)); - - const int shift = 19; - int value = (1 << shift) / 9 + 1; //58255 / (2^19) ~= 1/9 - const int32x4_t oneovernine = vdupq_n_s32(value); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - int16x8_t out = calculate_kernel(top_data, mid_data, bot_data); - - int32x4_t outfloathigh = vmovl_s16(vget_high_s16(out)); - int32x4_t outfloatlow = vmovl_s16(vget_low_s16(out)); - - outfloathigh = vmulq_s32(outfloathigh, oneovernine); - outfloatlow = vmulq_s32(outfloatlow, oneovernine); - outfloathigh = vshrq_n_s32(outfloathigh, shift); - outfloatlow = vshrq_n_s32(outfloatlow, shift); - out = vcombine_s16(vqmovn_s32((outfloatlow)), - vqmovn_s32((outfloathigh))); - - vst1_u8(output.ptr(), vqmovun_s16(out)); - }, - input, output); -} diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.h b/src/core/NEON/kernels/NEBox3x3Kernel.h deleted file mode 100644 index 4f9ac18219..0000000000 --- a/src/core/NEON/kernels/NEBox3x3Kernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H -#define ARM_COMPUTE_NEBOX3x3KERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Neon kernel to perform a Box 3x3 filter */ -class NEBox3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEBox3x3Kernel"; - } - /** Default constructor */ - NEBox3x3Kernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBox3x3Kernel(const NEBox3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBox3x3Kernel &operator=(const NEBox3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEBox3x3Kernel(NEBox3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEBox3x3Kernel &operator=(NEBox3x3Kernel &&) = default; - /** Default destructor */ - ~NEBox3x3Kernel() = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Neon kernel to perform a Box 3x3 filter for FP16 datatype - */ -class NEBox3x3FP16Kernel : public NEBox3x3Kernel -{ -public: - const char *name() const override - { - return "NEBox3x3FP16Kernel"; - } - /** Default constructor */ - NEBox3x3FP16Kernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBox3x3FP16Kernel(const NEBox3x3FP16Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBox3x3FP16Kernel &operator=(const NEBox3x3FP16Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEBox3x3FP16Kernel(NEBox3x3FP16Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEBox3x3FP16Kernel &operator=(NEBox3x3FP16Kernel &&) = default; - /** Default destructor */ - ~NEBox3x3FP16Kernel() = default; - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** Neon kernel to perform a Box 3x3 filter for FP16 datatype */ -using NEBox3x3FP16Kernel = NEBox3x3Kernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp deleted file mode 100644 index 7a2bf20c04..0000000000 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ /dev/null @@ -1,1122 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NECannyEdgeKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -namespace arm_compute -{ -namespace -{ -constexpr int NO_EDGE = 0; -constexpr int EDGE = 255; -constexpr int MAYBE = 127; - -inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy) -{ - // Constant use for evaluating score1 and score3 - static const float32x4_t const45 = vdupq_n_f32(0.70710678118655f); - static const float32x4_t zero = vdupq_n_f32(0.0f); - static const float32x4_t one = vdupq_n_f32(1.0f); - static const float32x4_t two = vdupq_n_f32(2.0f); - static const float32x4_t three = vdupq_n_f32(3.0f); - - // Score0: (1, 0) - const float32x4x2_t score0 = - { - { - vabsq_f32(gx.val[0]), - vabsq_f32(gx.val[1]) - } - }; - - // Score2: ( 0, 1 ) - const float32x4x2_t score2 = - { - { - vabsq_f32(gy.val[0]), - vabsq_f32(gy.val[1]) - } - }; - - // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 ) - float32x4x2_t score1 = - { - { - vmulq_f32(gy.val[0], const45), - vmulq_f32(gy.val[1], const45) - } - }; - - float32x4x2_t score3 = score1; - - score1.val[0] = vmlaq_f32(score1.val[0], gx.val[0], const45); - score1.val[1] = vmlaq_f32(score1.val[1], gx.val[1], const45); - score3.val[0] = vmlsq_f32(score3.val[0], gx.val[0], const45); - score3.val[1] = vmlsq_f32(score3.val[1], gx.val[1], const45); - - score1.val[0] = vabsq_f32(score1.val[0]); - score1.val[1] = vabsq_f32(score1.val[1]); - score3.val[0] = vabsq_f32(score3.val[0]); - score3.val[1] = vabsq_f32(score3.val[1]); - - float32x4x2_t phase = - { - { - zero, - zero - } - }; - - float32x4x2_t old_score = score0; - - // score1 > old_score? - uint32x4x2_t mask = - { - { - vcgtq_f32(score1.val[0], old_score.val[0]), - vcgtq_f32(score1.val[1], old_score.val[1]) - } - }; - - phase.val[0] = vbslq_f32(mask.val[0], one, phase.val[0]); - phase.val[1] = vbslq_f32(mask.val[1], one, phase.val[1]); - old_score.val[0] = vbslq_f32(mask.val[0], score1.val[0], old_score.val[0]); - old_score.val[1] = vbslq_f32(mask.val[1], score1.val[1], old_score.val[1]); - - // score2 > old_score? - mask.val[0] = vcgtq_f32(score2.val[0], old_score.val[0]); - mask.val[1] = vcgtq_f32(score2.val[1], old_score.val[1]); - - phase.val[0] = vbslq_f32(mask.val[0], two, phase.val[0]); - phase.val[1] = vbslq_f32(mask.val[1], two, phase.val[1]); - old_score.val[0] = vbslq_f32(mask.val[0], score2.val[0], old_score.val[0]); - old_score.val[1] = vbslq_f32(mask.val[1], score2.val[1], old_score.val[1]); - - // score3 > old_score? - mask.val[0] = vcgtq_f32(score3.val[0], old_score.val[0]); - mask.val[1] = vcgtq_f32(score3.val[1], old_score.val[1]); - - phase.val[0] = vbslq_f32(mask.val[0], three, phase.val[0]); - phase.val[1] = vbslq_f32(mask.val[1], three, phase.val[1]); - old_score.val[0] = vbslq_f32(mask.val[0], score3.val[0], old_score.val[0]); - old_score.val[1] = vbslq_f32(mask.val[1], score3.val[1], old_score.val[1]); - - // Convert from float32x4_t to uint8x8_t - return vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(phase.val[0])), - vmovn_u32(vcvtq_u32_f32(phase.val[1])))); -} - -/* Computes the gradient phase if gradient_size = 3 or 5. The output is quantized. - * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135° - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return quantized phase for 8 pixels - */ -inline uint8x8_t phase_quantization_S16_S16(int16x8_t gx, int16x8_t gy) -{ - // Convert to float - const float32x4x2_t gx_f32 = - { - { - vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))), - vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx))) - } - }; - - const float32x4x2_t gy_f32 = - { - { - vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))), - vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy))) - } - }; - - return phase_quantization(gx_f32, gy_f32); -} - -/* Computes the gradient phase if gradient_size = 7. The output is quantized. - * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135° - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return quantized phase for 8 pixels - */ -inline uint8x8_t phase_quantization_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy) -{ - // Convert to float - const float32x4x2_t gx_f32 = - { - { - vcvtq_f32_s32(gx.val[0]), - vcvtq_f32_s32(gx.val[1]) - } - }; - - const float32x4x2_t gy_f32 = - { - { - vcvtq_f32_s32(gy.val[0]), - vcvtq_f32_s32(gy.val[1]) - } - }; - - return phase_quantization(gx_f32, gy_f32); -} - -/* Computes the magnitude using the L1-norm type if gradient_size = 3 or 5 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint16x8_t mag_l1_S16_S16(int16x8_t gx, int16x8_t gy) -{ - return vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(gx)), - vreinterpretq_u16_s16(vabsq_s16(gy))); -} - -/* Computes the magnitude using the L1-norm type if gradient_size = 7 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint32x4x2_t mag_l1_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy) -{ - const uint32x4x2_t gx_abs = - { - { - vreinterpretq_u32_s32(vabsq_s32(gx.val[0])), - vreinterpretq_u32_s32(vabsq_s32(gx.val[1])) - } - }; - - const uint32x4x2_t gy_abs = - { - { - vreinterpretq_u32_s32(vabsq_s32(gy.val[0])), - vreinterpretq_u32_s32(vabsq_s32(gy.val[1])) - } - }; - - const uint32x4x2_t output = - { - { - vaddq_u32(gx_abs.val[0], gy_abs.val[0]), - vaddq_u32(gx_abs.val[1], gy_abs.val[1]) - } - }; - - return output; -} - -inline float32x4x2_t mag_l2(const float32x4x2_t &gx, const float32x4x2_t &gy) -{ - // x^2 ... - float32x4x2_t magnitude = - { - { - vmulq_f32(gx.val[0], gx.val[0]), - vmulq_f32(gx.val[1], gx.val[1]) - } - }; - - // ... + y^2 - magnitude.val[0] = vmlaq_f32(magnitude.val[0], gy.val[0], gy.val[0]); - magnitude.val[1] = vmlaq_f32(magnitude.val[1], gy.val[1], gy.val[1]); - - // sqrt(...) - magnitude.val[0] = vmulq_f32(vrsqrteq_f32(magnitude.val[0]), magnitude.val[0]); - magnitude.val[1] = vmulq_f32(vrsqrteq_f32(magnitude.val[1]), magnitude.val[1]); - - return magnitude; -} - -/* Computes the magnitude using L2-norm if gradient_size = 3 or 5 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint16x8_t mag_l2_S16_S16(int16x8_t gx, int16x8_t gy) -{ - // Compute magnitude using L2 normalization - const float32x4x2_t gx2 = - { - { - vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))), - vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx))) - } - }; - - const float32x4x2_t gy2 = - { - { - vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))), - vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy))) - } - }; - - const float32x4x2_t magnitude = mag_l2(gx2, gy2); - - // Store magnitude - Convert to uint16x8 - return vcombine_u16(vmovn_u32(vcvtq_u32_f32(magnitude.val[0])), - vmovn_u32(vcvtq_u32_f32(magnitude.val[1]))); -} - -/* Computes the magnitude using L2-norm if gradient_size = 7 - * - * @param[in] gx Gx component - * @param[in] gy Gy component - * - * @return magnitude for 8 pixels - */ -inline uint32x4x2_t mag_l2_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy) -{ - // Compute magnitude using L2 normalization - float32x4x2_t gx2 = - { - { - vcvtq_f32_s32(gx.val[0]), - vcvtq_f32_s32(gx.val[1]) - } - }; - - float32x4x2_t gy2 = - { - { - vcvtq_f32_s32(gy.val[0]), - vcvtq_f32_s32(gy.val[1]) - } - }; - - const float32x4x2_t magnitude = mag_l2(gx2, gy2); - const uint32x4x2_t mag32 = - { - { - vcvtq_u32_f32(magnitude.val[0]), - vcvtq_u32_f32(magnitude.val[1]) - } - }; - - return mag32; -} - -/* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L1-norm - * - * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16 - * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16 - * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16 - * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8 - */ -void mag_phase_l1norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr) -{ - const auto gx = static_cast(gx_ptr); - const auto gy = static_cast(gy_ptr); - const auto magnitude = static_cast(magnitude_ptr); - const auto phase = static_cast(phase_ptr); - - const int16x8x4_t gx_val = - { - { - vld1q_s16(gx), - vld1q_s16(gx + 8), - vld1q_s16(gx + 16), - vld1q_s16(gx + 24) - } - }; - - const int16x8x4_t gy_val = - { - { - vld1q_s16(gy), - vld1q_s16(gy + 8), - vld1q_s16(gy + 16), - vld1q_s16(gy + 24) - } - }; - - // Compute and store phase - vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0])); - vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1])); - vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2])); - vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3])); - - // Compute ans store magnitude using L1 normalization - vst1q_u16(magnitude + 0, mag_l1_S16_S16(gx_val.val[0], gy_val.val[0])); - vst1q_u16(magnitude + 8, mag_l1_S16_S16(gx_val.val[1], gy_val.val[1])); - vst1q_u16(magnitude + 16, mag_l1_S16_S16(gx_val.val[2], gy_val.val[2])); - vst1q_u16(magnitude + 24, mag_l1_S16_S16(gx_val.val[3], gy_val.val[3])); -} - -/* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L2-norm - * - * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16 - * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16 - * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16 - * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8 - */ -void mag_phase_l2norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr) -{ - const auto gx = static_cast(gx_ptr); - const auto gy = static_cast(gy_ptr); - const auto magnitude = static_cast(magnitude_ptr); - const auto phase = static_cast(phase_ptr); - - const int16x8x4_t gx_val = - { - { - vld1q_s16(gx), - vld1q_s16(gx + 8), - vld1q_s16(gx + 16), - vld1q_s16(gx + 24) - } - }; - - const int16x8x4_t gy_val = - { - { - vld1q_s16(gy), - vld1q_s16(gy + 8), - vld1q_s16(gy + 16), - vld1q_s16(gy + 24) - } - }; - - // Compute and store phase - vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0])); - vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1])); - vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2])); - vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3])); - - // Compute and store magnitude using L2 normalization - vst1q_u16(magnitude + 0, mag_l2_S16_S16(gx_val.val[0], gy_val.val[0])); - vst1q_u16(magnitude + 8, mag_l2_S16_S16(gx_val.val[1], gy_val.val[1])); - vst1q_u16(magnitude + 16, mag_l2_S16_S16(gx_val.val[2], gy_val.val[2])); - vst1q_u16(magnitude + 24, mag_l2_S16_S16(gx_val.val[3], gy_val.val[3])); -} - -/* Gradient function used when the gradient size = 7 and when the norm_type = L1-norm - * - * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32 - * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32 - * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32 - * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type support U8 - */ -void mag_phase_l1norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr) -{ - auto gx = static_cast(gx_ptr); - auto gy = static_cast(gy_ptr); - auto magnitude = static_cast(magnitude_ptr); - auto phase = static_cast(phase_ptr); - - // Process low and high part - for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16) - { - const int32x4x2_t gx0 = - { - { - vld1q_s32(gx + 0), - vld1q_s32(gx + 4) - } - }; - - const int32x4x2_t gx1 = - { - { - vld1q_s32(gx + 8), - vld1q_s32(gx + 12) - } - }; - - const int32x4x2_t gy0 = - { - { - vld1q_s32(gy + 0), - vld1q_s32(gy + 4) - } - }; - - const int32x4x2_t gy1 = - { - { - vld1q_s32(gy + 8), - vld1q_s32(gy + 12) - } - }; - - // Compute and store phase - vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0)); - vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1)); - - // Compute magnitude using L1 normalization - const uint32x4x2_t mag0 = mag_l1_S32_S32(gx0, gy0); - const uint32x4x2_t mag1 = mag_l1_S32_S32(gx1, gy1); - - // Store magnitude - vst1q_u32(magnitude + 0, mag0.val[0]); - vst1q_u32(magnitude + 4, mag0.val[1]); - vst1q_u32(magnitude + 8, mag1.val[0]); - vst1q_u32(magnitude + 12, mag1.val[1]); - } -} - -/* Gradient function used when the gradient size = 7 and when the norm_type = L2-norm - * - * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32 - * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32 - * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32 - * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8 - */ -void mag_phase_l2norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr) -{ - auto gx = static_cast(gx_ptr); - auto gy = static_cast(gy_ptr); - auto magnitude = static_cast(magnitude_ptr); - auto phase = static_cast(phase_ptr); - - // Process low and high part - for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16) - { - const int32x4x2_t gx0 = - { - { - vld1q_s32(gx + 0), - vld1q_s32(gx + 4) - } - }; - - const int32x4x2_t gx1 = - { - { - vld1q_s32(gx + 8), - vld1q_s32(gx + 12) - } - }; - - const int32x4x2_t gy0 = - { - { - vld1q_s32(gy + 0), - vld1q_s32(gy + 4) - } - }; - - const int32x4x2_t gy1 = - { - { - vld1q_s32(gy + 8), - vld1q_s32(gy + 12) - } - }; - - // Compute and store phase - vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0)); - vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1)); - - // Compute magnitude using L2 normalization - const uint32x4x2_t mag0 = mag_l2_S32_S32(gx0, gy0); - const uint32x4x2_t mag1 = mag_l2_S32_S32(gx1, gy1); - - // Store magnitude - vst1q_u32(magnitude + 0, mag0.val[0]); - vst1q_u32(magnitude + 4, mag0.val[1]); - vst1q_u32(magnitude + 8, mag1.val[0]); - vst1q_u32(magnitude + 12, mag1.val[1]); - } -} - -/* Computes non-maxima suppression and hysteresis when the gradient size = 3 or 5 - * - * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U16 - * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8 - * @param[out] output_ptr Pointer to output image. Data type supported U8 - * @param[in] stride_mag Stride of magnitude image - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in] upper_thr Upper threshold used for the hysteresis - */ -void non_max_suppression_U16_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr, - const int32_t upper_thr) -{ - const auto magnitude = static_cast(magnitude_ptr); - const auto phase = static_cast(phase_ptr); - const auto output = static_cast(output_ptr); - - // Get magnitude and phase of the centre pixels - uint16x8_t mc = vld1q_u16(magnitude); - - // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135° - const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase)); - - // 0 degree - const uint16x8_t mk0_0 = vld1q_u16(magnitude - 1); - const uint16x8_t mk0_1 = vld1q_u16(magnitude + 1); - uint16x8_t mask0 = vceqq_u16(pc16, vdupq_n_u16(0)); - mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_0)); - mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_1)); - - // 45 degree - const uint16x8_t mk45_0 = vld1q_u16(magnitude - stride_mag - 1); - const uint16x8_t mk45_1 = vld1q_u16(magnitude + stride_mag + 1); - uint16x8_t mask1 = vceqq_u16(pc16, vdupq_n_u16(1)); - mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_0)); - mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_1)); - - // 90 degree - const uint16x8_t mk90_0 = vld1q_u16(magnitude - stride_mag); - const uint16x8_t mk90_1 = vld1q_u16(magnitude + stride_mag); - uint16x8_t mask2 = vceqq_u16(pc16, vdupq_n_u16(2)); - mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_0)); - mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_1)); - - // 135 degree - const uint16x8_t mk135_0 = vld1q_u16(magnitude - stride_mag + 1); - const uint16x8_t mk135_1 = vld1q_u16(magnitude + stride_mag - 1); - uint16x8_t mask3 = vceqq_u16(pc16, vdupq_n_u16(3)); - mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_0)); - mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_1)); - - // Merge masks - mask0 = vorrq_u16(mask0, mask1); - mask2 = vorrq_u16(mask2, mask3); - mask0 = vorrq_u16(mask0, mask2); - - mc = vbslq_u16(mask0, mc, vdupq_n_u16(0)); - - // mc > upper_thr - mask0 = vcgtq_u16(mc, vdupq_n_u16(upper_thr)); - - // mc <= lower_thr - mask1 = vcleq_u16(mc, vdupq_n_u16(lower_thr)); - - // mc <= upper_thr && mc > lower_thr - mask2 = vcleq_u16(mc, vdupq_n_u16(upper_thr)); - mask2 = vandq_u16(mask2, vcgtq_u16(mc, vdupq_n_u16(lower_thr))); - - mc = vbslq_u16(mask0, vdupq_n_u16(EDGE), mc); - mc = vbslq_u16(mask1, vdupq_n_u16(NO_EDGE), mc); - mc = vbslq_u16(mask2, vdupq_n_u16(MAYBE), mc); - - vst1_u8(output, vmovn_u16(mc)); -} - -inline uint16x4_t non_max_U32_helper(const uint32_t *input, const uint16x4_t pc, const uint32_t stride_mag, const int32_t lower_thr, const int32_t upper_thr) -{ - // Phase for 4 pixel - const uint32x4_t pc32 = vmovl_u16(pc); - - // Get magnitude for 4 pixel - uint32x4_t mc = vld1q_u32(input); - - // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135° - // 0 degree - const uint32x4_t mk0_0 = vld1q_u32(input - 1); - const uint32x4_t mk0_1 = vld1q_u32(input + 1); - uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0)); - mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0)); - mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1)); - - // 45 degree - const uint32x4_t mk45_0 = vld1q_u32(input - stride_mag - 1); - const uint32x4_t mk45_1 = vld1q_u32(input + stride_mag + 1); - uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1)); - mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0)); - mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1)); - - // 90 degree - const uint32x4_t mk90_0 = vld1q_u32(input - stride_mag); - const uint32x4_t mk90_1 = vld1q_u32(input + stride_mag); - uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2)); - mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0)); - mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1)); - - // 135 degree - const uint32x4_t mk135_0 = vld1q_u32(input - stride_mag + 1); - const uint32x4_t mk135_1 = vld1q_u32(input + stride_mag - 1); - uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3)); - mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0)); - mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1)); - - // Merge masks - mask0 = vorrq_u32(mask0, mask1); - mask2 = vorrq_u32(mask2, mask3); - mask0 = vorrq_u32(mask0, mask2); - - mc = vbslq_u32(mask0, mc, vdupq_n_u32(0)); - - // mc > upper_thr - mask0 = vcgtq_u32(mc, vdupq_n_u32(upper_thr)); - - // mc <= lower_thr - mask1 = vcleq_u32(mc, vdupq_n_u32(lower_thr)); - - // mc <= upper_thr && mc > lower_thr - mask2 = vcleq_u32(mc, vdupq_n_u32(upper_thr)); - mask2 = vandq_u32(mask2, vcgtq_u32(mc, vdupq_n_u32(lower_thr))); - - mc = vbslq_u32(mask0, vdupq_n_u32(EDGE), mc); - mc = vbslq_u32(mask1, vdupq_n_u32(NO_EDGE), mc); - mc = vbslq_u32(mask2, vdupq_n_u32(MAYBE), mc); - - return vmovn_u32(mc); -} - -/* Computes non-maxima suppression and hysteresis when the gradient_size = 7 - * - * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U32 - * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8 - * @param[out] output_ptr Pointer to destination image. Data type supported U8 - * @param[in] stride_mag Stride of magnitude image - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in] upper_thr Upper threshold used for the hysteresis - */ -void non_max_suppression_U32_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr, - const int32_t upper_thr) -{ - const auto magnitude = static_cast(magnitude_ptr); - const auto phase = static_cast(phase_ptr); - const auto output = static_cast(output_ptr); - - // Get phase for 8 pixel - const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase)); - - // Compute non maxima suppression - const uint16x4x2_t res = - { - { - non_max_U32_helper(magnitude, vget_low_u16(pc16), stride_mag, lower_thr, upper_thr), - non_max_U32_helper(magnitude + 4, vget_high_u16(pc16), stride_mag, lower_thr, upper_thr) - } - }; - - // Store result - vst1_u8(output, vmovn_u16(vcombine_u16(res.val[0], res.val[1]))); -} - -/* Computes edge tracing when is called by edge_trace_U8_U8 recursively - * - * @param[in] input Pointer to source image. Data type supported U8 - * @param[out] output Pointer to destination image. Data type supported U8 - * @param[in] input_stride Stride of the input image - * @param[in] output_stride Stride of the output image - */ -void edge_trace_recursive_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride) -{ - // Look for MAYBE pixels in 8 directions - *output = EDGE; - - // (-1, 0) - uint8_t pixel = *(input - 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(input - 1) = EDGE; - - edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride); - } - - // (+1, 0) - pixel = *(input + 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(input + 1) = EDGE; - - edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride); - } - - input -= input_stride; - output -= output_stride; - - // (-1, -1) - pixel = *(input - 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(input - 1) = EDGE; - - edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride); - } - - // (0, -1) - pixel = *input; - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *input = EDGE; - - edge_trace_recursive_U8_U8(input, output, input_stride, output_stride); - } - - // (+1, -1) - pixel = *(input + 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(input + 1) = EDGE; - - edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride); - } - - input += input_stride * 2; - output += output_stride * 2; - - // (-1, +1) - pixel = *(input - 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(input - 1) = EDGE; - - edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride); - } - - // (0, +1) - pixel = *input; - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *input = EDGE; - - edge_trace_recursive_U8_U8(input, output, input_stride, output_stride); - } - - // (+1, +1) - pixel = *(input + 1); - - if(pixel == MAYBE) - { - // Touched a MAYBE point. MAYBE becomes EDGE - *(input + 1) = EDGE; - - edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride); - } -} - -/* Computes edge tracing - * - * @param[in] input Pointer to source image. Data type supported U8 - * @param[out] output Pointer to destination image. Data type supported U8 - * @param[in] input_stride Stride of the input image - * @param[in] output_stride Stride of the output image - */ -void edge_trace_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride) -{ - if(*input == NO_EDGE) - { - *output = NO_EDGE; - } - // Check if EDGE and not yet touched - else if((*input == EDGE) && (*output == NO_EDGE)) - { - edge_trace_recursive_U8_U8(input, output, input_stride, output_stride); - } -} -} // namespace - -NEGradientKernel::~NEGradientKernel() = default; - -NEGradientKernel::NEGradientKernel() - : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr) -{ -} - -void NEGradientKernel::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(gx, gy, magnitude, phase); - - set_shape_if_empty(*magnitude->info(), gx->info()->tensor_shape()); - set_shape_if_empty(*phase->info(), gx->info()->tensor_shape()); - - Format magnitude_format = gx->info()->data_type() == DataType::S16 ? Format::U16 : Format::U32; - set_format_if_unknown(*magnitude->info(), magnitude_format); - set_format_if_unknown(*phase->info(), Format::U8); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(gx, gy, magnitude, phase); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, gy); - ARM_COMPUTE_ERROR_ON_MSG(element_size_from_data_type(gx->info()->data_type()) != element_size_from_data_type(magnitude->info()->data_type()), "Magnitude must have the same element size as Gx and Gy"); - - _gx = gx; - _gy = gy; - _magnitude = magnitude; - _phase = phase; - - if(_gx->info()->data_type() == DataType::S16) - { - if(norm_type == 1) - { - _func = &mag_phase_l1norm_S16_S16_U16_U8; - } - else - { - _func = &mag_phase_l2norm_S16_S16_U16_U8; - } - } - else - { - if(norm_type == 1) - { - _func = &mag_phase_l1norm_S32_S32_U32_U8; - } - else - { - _func = &mag_phase_l2norm_S32_S32_U32_U8; - } - } - - constexpr unsigned int num_elems_processed_per_iteration = 32; - - // Configure kernel window - Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal gx_access(_gx->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal gy_access(_gy->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration); - - ARM_COMPUTE_UNUSED(update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access)); - - mag_access.set_valid_region(win, _gx->info()->valid_region()); - phase_access.set_valid_region(win, _gx->info()->valid_region()); - - INEKernel::configure(win); -} - -void NEGradientKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - Iterator gx(_gx, window); - Iterator gy(_gy, window); - Iterator magnitude(_magnitude, window); - Iterator phase(_phase, window); - - execute_window_loop(window, [&](const Coordinates &) - { - (*_func)(gx.ptr(), gy.ptr(), magnitude.ptr(), phase.ptr()); - }, - gx, gy, magnitude, phase); -} - -NEEdgeNonMaxSuppressionKernel::~NEEdgeNonMaxSuppressionKernel() = default; -NEEdgeNonMaxSuppressionKernel::NEEdgeNonMaxSuppressionKernel() - : _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0) -{ -} - -BorderSize NEEdgeNonMaxSuppressionKernel::border_size() const -{ - return BorderSize(1); -} - -void NEEdgeNonMaxSuppressionKernel::configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, - int32_t upper_thr, int32_t lower_thr, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(magnitude, phase, output); - - set_shape_if_empty(*output->info(), magnitude->info()->tensor_shape()); - - set_format_if_unknown(*phase->info(), Format::U8); - set_format_if_unknown(*output->info(), Format::U8); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(magnitude, phase, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(phase, output); - - _magnitude = magnitude; - _phase = phase; - _output = output; - - switch(_magnitude->info()->data_type()) - { - case DataType::U16: - _func = &non_max_suppression_U16_U8_U8; - break; - case DataType::U32: - _func = &non_max_suppression_U32_U8_U8; - break; - default: - ARM_COMPUTE_ERROR("Unsupported data type!"); - } - - // Set thresholds - _lower_thr = lower_thr; - _upper_thr = upper_thr; - - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 10; - constexpr unsigned int num_rows_read_per_iteration = 3; - - // Configure kernel window - Window win = calculate_max_window(*_magnitude->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle mag_access(_magnitude->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, mag_access, phase_access, output_access); - - output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEEdgeNonMaxSuppressionKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - Iterator magnitude(_magnitude, window); - Iterator phase(_phase, window); - Iterator output(_output, window); - - const size_t input1_stride = _magnitude->info()->strides_in_bytes()[1]; - const size_t input1_stride_ushort = input1_stride / data_size_from_type(_magnitude->info()->data_type()); - - execute_window_loop(window, [&](const Coordinates &) - { - (*_func)(magnitude.ptr(), phase.ptr(), output.ptr(), input1_stride_ushort, _lower_thr, _upper_thr); - }, - magnitude, phase, output); -} - -NEEdgeTraceKernel::~NEEdgeTraceKernel() = default; -NEEdgeTraceKernel::NEEdgeTraceKernel() - : _input(nullptr), _output(nullptr) -{ -} - -BorderSize NEEdgeTraceKernel::border_size() const -{ - return BorderSize(1); -} - -bool NEEdgeTraceKernel::is_parallelisable() const -{ - return false; -} - -void NEEdgeTraceKernel::configure(ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); - - set_format_if_unknown(*input->info(), Format::U8); - set_format_if_unknown(*output->info(), Format::U8); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 1; - - // Configure kernel window - Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration)); - - const ValidRegion &input_valid_region = input->info()->valid_region(); - const ValidRegion &output_valid_region = output->info()->valid_region(); - - // Reads can occur within the valid region of the input + border - AccessWindowStatic input_access(input->info(), - input_valid_region.anchor[0] - border_size().left, - input_valid_region.anchor[1] - border_size().top, - input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right, - input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom); - - // Writes can occur within the valid region of the output + border - AccessWindowStatic output_access(output->info(), - output_valid_region.anchor[0] - border_size().left, - output_valid_region.anchor[1] - border_size().top, - output_valid_region.anchor[0] + output_valid_region.shape[0] + border_size().right, - output_valid_region.anchor[1] + output_valid_region.shape[1] + border_size().bottom); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, _input->info()->valid_region()); - - INEKernel::configure(win); -} - -void NEEdgeTraceKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - Iterator input(_input, window); - Iterator output(_output, window); - - const size_t input_stride = _input->info()->strides_in_bytes()[1]; - const size_t output_stride = _output->info()->strides_in_bytes()[1]; - - execute_window_loop(window, [&](const Coordinates &) - { - edge_trace_U8_U8(input.ptr(), output.ptr(), input_stride, output_stride); - }, - input, output); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.h b/src/core/NEON/kernels/NECannyEdgeKernel.h deleted file mode 100644 index f1d24410f7..0000000000 --- a/src/core/NEON/kernels/NECannyEdgeKernel.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H -#define ARM_COMPUTE_NECANNYEDGEKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Computes magnitude and quantised phase from inputs gradients. */ -class NEGradientKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGradientKernel"; - } - /** Default constructor */ - NEGradientKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGradientKernel(const NEGradientKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGradientKernel &operator=(const NEGradientKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGradientKernel(NEGradientKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGradientKernel &operator=(NEGradientKernel &&) = default; - /** Default destructor */ - ~NEGradientKernel(); - - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and magnitude must all be the same size (either 16 or 32) - * - * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. - * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). - * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. - * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm - */ - virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - /** Common signature for all the specialised gradient functions - * - * @param[in] gx_ptr Pointer to the first input tensor. - * @param[in] gy_ptr Pointer to the second input tensor. - * @param[out] magnitude_ptr Pointer to the first output tensor - * @param[out] phase_ptr Pointer to the second output tensor - */ - using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); - - GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ - const ITensor *_gx; /**< Source tensor - Gx component */ - const ITensor *_gy; /**< Source tensor - Gy component */ - ITensor *_magnitude; /**< Destination tensor - Magnitude */ - ITensor *_phase; /**< Destination tensor - Quantized phase */ -}; - -/** Neon kernel to perform Non-Maxima suppression for Canny Edge. - * - * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input - * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. - * - * @note Hysteresis is computed in @ref NEEdgeTraceKernel - */ -class NEEdgeNonMaxSuppressionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEEdgeNonMaxSuppressionKernel"; - } - /** Default constructor */ - NEEdgeNonMaxSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; - /** Default destructor */ - ~NEEdgeNonMaxSuppressionKernel(); - - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. - * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Common signature for all the specialised non-maxima suppression functions - * - * @param[in] magnitude_ptr Pointer to the first input tensor. - * @param[in] phase_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor - * @param[in] stride_mag Stride of the magnitude tensor - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - */ - using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, - const int32_t lower_thr); - - EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ - const ITensor *_magnitude; /**< Source tensor - Magnitude */ - const ITensor *_phase; /**< Source tensor - Quantized phase */ - ITensor *_output; /**< Destination tensor */ - int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ - int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ -}; - -/** Neon kernel to perform Edge tracing */ -class NEEdgeTraceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEEdgeTraceKernel"; - } - /** Default constructor */ - NEEdgeTraceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; - /** Default destructor */ - ~NEEdgeTraceKernel(); - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" - * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). - */ - void configure(ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - bool is_parallelisable() const override; - -private: - ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.cpp b/src/core/NEON/kernels/NEChannelCombineKernel.cpp deleted file mode 100644 index 6bfd4c5bda..0000000000 --- a/src/core/NEON/kernels/NEChannelCombineKernel.cpp +++ /dev/null @@ -1,456 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEChannelCombineKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/IMultiImage.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -NEChannelCombineKernel::NEChannelCombineKernel() - : _func(nullptr), _planes{ { nullptr } }, _output(nullptr), _output_multi(nullptr), _x_subsampling{ { 1, 1, 1 } }, _y_subsampling{ { 1, 1, 1 } }, _num_elems_processed_per_iteration(8), -_is_parallelizable(true) -{ -} - -void NEChannelCombineKernel::configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output); - ARM_COMPUTE_ERROR_ON(plane0 == output); - ARM_COMPUTE_ERROR_ON(plane1 == output); - ARM_COMPUTE_ERROR_ON(plane2 == output); - - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::RGB888, Format::RGBA8888, Format::UYVY422, Format::YUYV422); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8); - - const Format output_format = output->info()->format(); - - // Check if horizontal dimension of Y plane is even and validate horizontal sub-sampling dimensions for U and V planes - if(Format::YUYV422 == output_format || Format::UYVY422 == output_format) - { - // Validate Y plane of input and output - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output); - - // Validate U and V plane of the input - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2); - } - - _planes[0] = plane0; - _planes[1] = plane1; - _planes[2] = plane2; - _planes[3] = nullptr; - - // Validate the last input tensor only for RGBA format - if(Format::RGBA8888 == output_format) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(plane3); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane3); - - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane3, Format::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane3, 1, DataType::U8); - - _planes[3] = plane3; - } - - _output = output; - _output_multi = nullptr; - - // Half the processed elements for U and V channels due to horizontal sub-sampling of 2 - if(Format::YUYV422 == output_format || Format::UYVY422 == output_format) - { - _x_subsampling[1] = 2; - _x_subsampling[2] = 2; - } - - _num_elems_processed_per_iteration = 8; - _is_parallelizable = true; - - // Select function and number of elements to process given the output format - switch(output_format) - { - case Format::RGB888: - _func = &NEChannelCombineKernel::combine_3C; - break; - case Format::RGBA8888: - _func = &NEChannelCombineKernel::combine_4C; - break; - case Format::UYVY422: - _num_elems_processed_per_iteration = 16; - _func = &NEChannelCombineKernel::combine_YUV_1p; - break; - case Format::YUYV422: - _num_elems_processed_per_iteration = 16; - _func = &NEChannelCombineKernel::combine_YUV_1p; - break; - default: - ARM_COMPUTE_ERROR("Not supported format."); - break; - } - - Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration)); - - AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration); - AccessWindowHorizontal plane0_access(plane0->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[0]); - AccessWindowHorizontal plane1_access(plane1->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[1]); - AccessWindowHorizontal plane2_access(plane2->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[2]); - AccessWindowHorizontal plane3_access(plane3 == nullptr ? nullptr : plane3->info(), 0, _num_elems_processed_per_iteration); - - update_window_and_padding( - win, - plane0_access, - plane1_access, - plane2_access, - plane3_access, - output_access); - - ValidRegion valid_region = intersect_valid_regions(plane0->info()->valid_region(), - plane1->info()->valid_region(), - plane2->info()->valid_region()); - - if(plane3 != nullptr) - { - valid_region = intersect_valid_regions(plane3->info()->valid_region(), valid_region); - } - - output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NEChannelCombineKernel::configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane1); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane2); - - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8); - - const Format output_format = output->info()->format(); - - // Validate shape of Y plane to be even and shape of sub-sampling dimensions for U and V planes - // Perform validation only for formats which require sub-sampling. - if(Format::YUV444 != output_format) - { - // Validate Y plane of input and output - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output->plane(0)); - - // Validate U and V plane of the input - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2); - - // Validate second plane U (NV12 and NV21 have a UV88 combined plane while IYUV has only the U plane) - // MultiImage generates the correct tensor shape but also check in case the tensor shape of planes was changed to a wrong size - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(1)); - - // Validate the last plane V of format IYUV - if(Format::IYUV == output_format) - { - // Validate Y plane of the output - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(2)); - } - } - - _planes[0] = plane0; - _planes[1] = plane1; - _planes[2] = plane2; - _planes[3] = nullptr; - _output = nullptr; - _output_multi = output; - - bool has_two_planes = false; - unsigned int num_elems_written_plane1 = 8; - - _num_elems_processed_per_iteration = 8; - _is_parallelizable = true; - - switch(output_format) - { - case Format::NV12: - case Format::NV21: - _x_subsampling = { { 1, 2, 2 } }; - _y_subsampling = { { 1, 2, 2 } }; - _func = &NEChannelCombineKernel::combine_YUV_2p; - has_two_planes = true; - num_elems_written_plane1 = 16; - break; - case Format::IYUV: - _is_parallelizable = false; - _x_subsampling = { { 1, 2, 2 } }; - _y_subsampling = { { 1, 2, 2 } }; - _func = &NEChannelCombineKernel::combine_YUV_3p; - break; - case Format::YUV444: - _is_parallelizable = false; - _x_subsampling = { { 1, 1, 1 } }; - _y_subsampling = { { 1, 1, 1 } }; - _func = &NEChannelCombineKernel::combine_YUV_3p; - break; - default: - ARM_COMPUTE_ERROR("Not supported format."); - break; - } - - const unsigned int y_step = *std::max_element(_y_subsampling.begin(), _y_subsampling.end()); - - Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration, y_step)); - AccessWindowRectangle output_plane0_access(output->plane(0)->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f, 1.f / _y_subsampling[0]); - AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_written_plane1, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]); - AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]); - - update_window_and_padding(win, - AccessWindowHorizontal(plane0->info(), 0, _num_elems_processed_per_iteration), - AccessWindowRectangle(plane1->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]), - AccessWindowRectangle(plane2->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]), - output_plane0_access, - output_plane1_access, - output_plane2_access); - - ValidRegion plane0_valid_region = plane0->info()->valid_region(); - ValidRegion output_plane1_region = has_two_planes ? intersect_valid_regions(plane1->info()->valid_region(), plane2->info()->valid_region()) : plane2->info()->valid_region(); - - output_plane0_access.set_valid_region(win, ValidRegion(plane0_valid_region.anchor, output->plane(0)->info()->tensor_shape())); - output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape())); - output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape())); - - INEKernel::configure(win); -} - -bool NEChannelCombineKernel::is_parallelisable() const -{ - return _is_parallelizable; -} - -void NEChannelCombineKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} - -void NEChannelCombineKernel::combine_3C(const Window &win) -{ - Iterator p0(_planes[0], win); - Iterator p1(_planes[1], win); - Iterator p2(_planes[2], win); - Iterator out(_output, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto p0_ptr = static_cast(p0.ptr()); - const auto p1_ptr = static_cast(p1.ptr()); - const auto p2_ptr = static_cast(p2.ptr()); - const auto out_ptr = static_cast(out.ptr()); - - const uint8x8x3_t pixels = - { - { - vld1_u8(p0_ptr), - vld1_u8(p1_ptr), - vld1_u8(p2_ptr) - } - }; - - vst3_u8(out_ptr, pixels); - }, - p0, p1, p2, out); -} - -void NEChannelCombineKernel::combine_4C(const Window &win) -{ - Iterator p0(_planes[0], win); - Iterator p1(_planes[1], win); - Iterator p2(_planes[2], win); - Iterator p3(_planes[3], win); - Iterator out(_output, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto p0_ptr = static_cast(p0.ptr()); - const auto p1_ptr = static_cast(p1.ptr()); - const auto p2_ptr = static_cast(p2.ptr()); - const auto p3_ptr = static_cast(p3.ptr()); - const auto out_ptr = static_cast(out.ptr()); - - const uint8x8x4_t pixels = - { - { - vld1_u8(p0_ptr), - vld1_u8(p1_ptr), - vld1_u8(p2_ptr), - vld1_u8(p3_ptr) - } - }; - - vst4_u8(out_ptr, pixels); - }, - p0, p1, p2, p3, out); -} - -template -void NEChannelCombineKernel::combine_YUV_1p(const Window &win) -{ - // Create sub-sampled uv window and init uv planes - Window win_uv(win); - win_uv.set_dimension_step(Window::DimX, win.x().step() / _x_subsampling[1]); - win_uv.validate(); - - Iterator p0(_planes[0], win); - Iterator p1(_planes[1], win_uv); - Iterator p2(_planes[2], win_uv); - Iterator out(_output, win); - - constexpr auto shift = is_uyvy ? 1 : 0; - - execute_window_loop(win, [&](const Coordinates &) - { - const auto p0_ptr = static_cast(p0.ptr()); - const auto p1_ptr = static_cast(p1.ptr()); - const auto p2_ptr = static_cast(p2.ptr()); - const auto out_ptr = static_cast(out.ptr()); - - const uint8x8x2_t pixels_y = vld2_u8(p0_ptr); - const uint8x8x2_t pixels_uv = - { - { - vld1_u8(p1_ptr), - vld1_u8(p2_ptr) - } - }; - - uint8x8x4_t pixels{ {} }; - pixels.val[0 + shift] = pixels_y.val[0]; - pixels.val[1 - shift] = pixels_uv.val[0]; - pixels.val[2 + shift] = pixels_y.val[1]; - pixels.val[3 - shift] = pixels_uv.val[1]; - - vst4_u8(out_ptr, pixels); - }, - p0, p1, p2, out); -} - -void NEChannelCombineKernel::combine_YUV_2p(const Window &win) -{ - ARM_COMPUTE_ERROR_ON(win.x().start() % _x_subsampling[1]); - ARM_COMPUTE_ERROR_ON(win.y().start() % _y_subsampling[1]); - - // Copy first plane - copy_plane(win, 0); - - // Update UV window - Window uv_win(win); - uv_win.set(Window::DimX, Window::Dimension(uv_win.x().start() / _x_subsampling[1], uv_win.x().end() / _x_subsampling[1], uv_win.x().step() / _x_subsampling[1])); - uv_win.set(Window::DimY, Window::Dimension(uv_win.y().start() / _y_subsampling[1], uv_win.y().end() / _y_subsampling[1], 1)); - uv_win.validate(); - - // Update output win - Window out_win(win); - out_win.set(Window::DimX, Window::Dimension(out_win.x().start(), out_win.x().end(), out_win.x().step() / _x_subsampling[1])); - out_win.set(Window::DimY, Window::Dimension(out_win.y().start() / _y_subsampling[1], out_win.y().end() / _y_subsampling[1], 1)); - out_win.validate(); - - // Construct second plane - const int shift = (Format::NV12 == _output_multi->info()->format()) ? 0 : 1; - Iterator p1(_planes[1 + shift], uv_win); - Iterator p2(_planes[2 - shift], uv_win); - Iterator out(_output_multi->plane(1), out_win); - - // Increase step size after iterator is created to calculate stride correctly for multi channel format - out_win.set_dimension_step(Window::DimX, out_win.x().step() * _x_subsampling[1]); - - execute_window_loop(out_win, [&](const Coordinates &) - { - const uint8x8x2_t pixels = - { - { - vld1_u8(p1.ptr()), - vld1_u8(p2.ptr()) - } - }; - - vst2_u8(out.ptr(), pixels); - }, - p1, p2, out); -} - -void NEChannelCombineKernel::combine_YUV_3p(const Window &win) -{ - copy_plane(win, 0); - copy_plane(win, 1); - copy_plane(win, 2); -} - -void NEChannelCombineKernel::copy_plane(const Window &win, uint32_t plane_id) -{ - ARM_COMPUTE_ERROR_ON(win.x().start() % _x_subsampling[plane_id]); - ARM_COMPUTE_ERROR_ON(win.y().start() % _y_subsampling[plane_id]); - - // Update window - Window tmp_win(win); - tmp_win.set(Window::DimX, Window::Dimension(tmp_win.x().start() / _x_subsampling[plane_id], tmp_win.x().end() / _x_subsampling[plane_id], tmp_win.x().step() / _x_subsampling[plane_id])); - tmp_win.set(Window::DimY, Window::Dimension(tmp_win.y().start() / _y_subsampling[plane_id], tmp_win.y().end() / _y_subsampling[plane_id], 1)); - - Iterator in(_planes[plane_id], tmp_win); - Iterator out(_output_multi->plane(plane_id), tmp_win); - - execute_window_loop(tmp_win, [&](const Coordinates &) - { - const uint8x8_t pixels = vld1_u8(in.ptr()); - - vst1_u8(out.ptr(), pixels); - }, - in, out); -} diff --git a/src/core/NEON/kernels/NEChannelCombineKernel.h b/src/core/NEON/kernels/NEChannelCombineKernel.h deleted file mode 100644 index a3372be4d2..0000000000 --- a/src/core/NEON/kernels/NEChannelCombineKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H -#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -#include -#include - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the channel combine kernel */ -class NEChannelCombineKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEChannelCombineKernel"; - } - /** Default constructor */ - NEChannelCombineKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelCombineKernel(NEChannelCombineKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; - /** Default destructor */ - ~NEChannelCombineKernel() = default; - - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 - * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - */ - void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 - */ - void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - /** Combine 3 planes to form a three channel single plane tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_3C(const Window &win); - /** Combine 4 planes to form a four channel single plane tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_4C(const Window &win); - /** Combine 3 planes to form a single plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - template - void combine_YUV_1p(const Window &win); - /** Combine 3 planes to form a two plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_YUV_2p(const Window &win); - /** Combine 3 planes to form a three plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_YUV_3p(const Window &win); - /** Copies a full plane to the output tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void copy_plane(const Window &win, uint32_t plane_id); - /** Common signature for all the specialised ChannelCombine functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); - /** ChannelCombine function to use for the particular tensor types passed to configure() */ - ChannelCombineFunction _func; - std::array _planes; - ITensor *_output; - IMultiImage *_output_multi; - std::array _x_subsampling; - std::array _y_subsampling; - unsigned int _num_elems_processed_per_iteration; - bool _is_parallelizable; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.cpp b/src/core/NEON/kernels/NEChannelExtractKernel.cpp deleted file mode 100644 index d0d1c6852f..0000000000 --- a/src/core/NEON/kernels/NEChannelExtractKernel.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEChannelExtractKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/IMultiImage.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -NEChannelExtractKernel::NEChannelExtractKernel() - : _func(nullptr), _lut_index(0) -{ -} - -void NEChannelExtractKernel::configure(const ITensor *input, Channel channel, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON(input == output); - - set_format_if_unknown(*output->info(), Format::U8); - - // Check if input tensor has a valid format - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::RGB888, Format::RGBA8888, Format::UYVY422, Format::YUYV422); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8); - - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - - // Check if channel is valid for given format - const Format format = input->info()->format(); - ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel); - - unsigned int subsampling = 1; - - if(format == Format::YUYV422 || format == Format::UYVY422) - { - // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422) - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input); - - if(channel != Channel::Y) - { - subsampling = 2; - } - } - - TensorShape output_shape = calculate_subsampled_shape(input->info()->tensor_shape(), format, channel); - set_shape_if_empty(*output->info(), output_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output_shape, output->info()->tensor_shape()); - - _input = input; - _output = output; - _lut_index = channel_idx_from_format(format, channel); - - unsigned int num_elems_processed_per_iteration = 16; - - if(format == Format::YUYV422 || format == Format::UYVY422) - { - _func = &NEChannelExtractKernel::extract_1C_from_2C_img; - - if(channel != Channel::Y) // Channel::U or Channel::V - { - num_elems_processed_per_iteration = 32; - _func = &NEChannelExtractKernel::extract_YUYV_uv; - } - } - else // Format::RGB888 or Format::RGBA8888 - { - _func = &NEChannelExtractKernel::extract_1C_from_3C_img; - - if(format == Format::RGBA8888) - { - _func = &NEChannelExtractKernel::extract_1C_from_4C_img; - } - } - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / subsampling, 1.f / subsampling); - update_window_and_padding(win, input_access, output_access); - - ValidRegion input_valid_region = input->info()->valid_region(); - output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NEChannelExtractKernel::configure(const IMultiImage *input, Channel channel, IImage *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - - set_format_if_unknown(*output->info(), Format::U8); - - const Format format = input->info()->format(); - ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel); - - // Get input plane - const IImage *input_plane = input->plane(plane_idx_from_channel(format, channel)); - ARM_COMPUTE_ERROR_ON_NULLPTR(input_plane); - - if(Channel::Y == channel && format != Format::YUV444) - { - // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422) - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input_plane); - } - - // Calculate 2x2 subsampled tensor shape - TensorShape output_shape = calculate_subsampled_shape(input->plane(0)->info()->tensor_shape(), format, channel); - set_shape_if_empty(*output->info(), output_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output_shape, output->info()->tensor_shape()); - - // Check if input tensor has a valid format - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8); - - _input = input_plane; - _output = output; - _lut_index = channel_idx_from_format(format, channel); - - unsigned int num_elems_processed_per_iteration = 32; - - _func = &NEChannelExtractKernel::copy_plane; - - if((format == Format::NV12 || format == Format::NV21) && channel != Channel::Y) - { - num_elems_processed_per_iteration = 16; - _func = &NEChannelExtractKernel::extract_1C_from_2C_img; - } - - Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(_input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, _input->info()->valid_region()); - - INEKernel::configure(win); -} - -void NEChannelExtractKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} - -void NEChannelExtractKernel::extract_1C_from_2C_img(const Window &win) -{ - Iterator in(_input, win); - Iterator out(_output, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto in_ptr = static_cast(in.ptr()); - const auto out_ptr = static_cast(out.ptr()); - const auto pixels = vld2q_u8(in_ptr); - vst1q_u8(out_ptr, pixels.val[_lut_index]); - }, - in, out); -} - -void NEChannelExtractKernel::extract_1C_from_3C_img(const Window &win) -{ - Iterator in(_input, win); - Iterator out(_output, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto in_ptr = static_cast(in.ptr()); - const auto out_ptr = static_cast(out.ptr()); - const auto pixels = vld3q_u8(in_ptr); - vst1q_u8(out_ptr, pixels.val[_lut_index]); - }, - in, out); -} - -void NEChannelExtractKernel::extract_1C_from_4C_img(const Window &win) -{ - Iterator in(_input, win); - Iterator out(_output, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto in_ptr = static_cast(in.ptr()); - const auto out_ptr = static_cast(out.ptr()); - const auto pixels = vld4q_u8(in_ptr); - vst1q_u8(out_ptr, pixels.val[_lut_index]); - }, - in, out); -} - -void NEChannelExtractKernel::extract_YUYV_uv(const Window &win) -{ - ARM_COMPUTE_ERROR_ON(win.x().step() % 2); - - Window win_out(win); - win_out.set_dimension_step(Window::DimX, win.x().step() / 2); - - Iterator in(_input, win); - Iterator out(_output, win_out); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto in_ptr = static_cast(in.ptr()); - const auto out_ptr = static_cast(out.ptr()); - const auto pixels = vld4q_u8(in_ptr); - vst1q_u8(out_ptr, pixels.val[_lut_index]); - }, - in, out); -} - -void NEChannelExtractKernel::copy_plane(const Window &win) -{ - Iterator in(_input, win); - Iterator out(_output, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto in_ptr = static_cast(in.ptr()); - const auto out_ptr = static_cast(out.ptr()); - vst4_u8(out_ptr, vld4_u8(in_ptr)); - }, - in, out); -} diff --git a/src/core/NEON/kernels/NEChannelExtractKernel.h b/src/core/NEON/kernels/NEChannelExtractKernel.h deleted file mode 100644 index 0b2847d79c..0000000000 --- a/src/core/NEON/kernels/NEChannelExtractKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H -#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the channel extract kernel */ -class NEChannelExtractKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEChannelExtractKernel"; - } - /** Default constructor */ - NEChannelExtractKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelExtractKernel(NEChannelExtractKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; - /** Default destructor */ - ~NEChannelExtractKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Format supported: U8 - */ - void configure(const ITensor *input, Channel channel, ITensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar destination image. Format supported: U8 - */ - void configure(const IMultiImage *input, Channel channel, IImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Extract one channel from a two channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_2C_img(const Window &win); - /** Extract one channel from a three channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_3C_img(const Window &win); - /** Extract one channel from a four channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_4C_img(const Window &win); - /** Extract U/V channel from a single planar YUVY/UYVY tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_YUYV_uv(const Window &win); - /** Copies a full plane to the output tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void copy_plane(const Window &win); - /** Common signature for all the specialised ChannelExtract functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); - /** ChannelExtract function to use for the particular tensor types passed to configure() */ - ChannelExtractFunction _func; - unsigned int _lut_index; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp deleted file mode 100644 index 23270d42d1..0000000000 --- a/src/core/NEON/kernels/NEColorConvertKernel.cpp +++ /dev/null @@ -1,590 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEColorConvertKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/IMultiImage.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "src/core/NEON/kernels/detail/NEColorConvertHelper.inl" - -using namespace arm_compute; - -NEColorConvertKernel::NEColorConvertKernel() - : _input(nullptr), _output(nullptr), _func(nullptr) -{ -} - -void NEColorConvertKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - - unsigned int num_elems_processed_per_iteration = 0; - - switch(input->info()->format()) - { - case Format::RGBA8888: - { - switch(output->info()->format()) - { - case Format::RGB888: - _func = colorconvert_rgbx_to_rgb; - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::UYVY422: - { - switch(output->info()->format()) - { - case Format::RGB888: - _func = colorconvert_yuyv_to_rgb; - num_elems_processed_per_iteration = 32; - break; - case Format::RGBA8888: - _func = colorconvert_yuyv_to_rgb; - num_elems_processed_per_iteration = 32; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::YUYV422: - { - switch(output->info()->format()) - { - case Format::RGB888: - _func = colorconvert_yuyv_to_rgb; - num_elems_processed_per_iteration = 32; - break; - case Format::RGBA8888: - _func = colorconvert_yuyv_to_rgb; - num_elems_processed_per_iteration = 32; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::RGB888: - { - switch(output->info()->format()) - { - case Format::RGBA8888: - _func = colorconvert_rgb_to_rgbx; - num_elems_processed_per_iteration = 16; - break; - case Format::U8: - _func = colorconvert_rgb_to_u8; - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - - _input = input; - _output = output; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - INEKernel::configure(win); -} - -void NEColorConvertKernel::configure(const IMultiImage *input, IImage *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - - set_shape_if_empty(*output->info(), input->plane(0)->info()->tensor_shape()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output); - - unsigned int num_elems_processed_per_iteration = 0; - - switch(input->info()->format()) - { - case Format::NV12: - { - switch(output->info()->format()) - { - case Format::RGB888: - _func = colorconvert_nv12_to_rgb; - num_elems_processed_per_iteration = 32; - break; - case Format::RGBA8888: - _func = colorconvert_nv12_to_rgb; - num_elems_processed_per_iteration = 32; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::NV21: - { - switch(output->info()->format()) - { - case Format::RGB888: - _func = colorconvert_nv12_to_rgb; - num_elems_processed_per_iteration = 32; - break; - case Format::RGBA8888: - _func = colorconvert_nv12_to_rgb; - num_elems_processed_per_iteration = 32; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::IYUV: - { - switch(output->info()->format()) - { - case Format::RGB888: - _func = colorconvert_iyuv_to_rgb; - num_elems_processed_per_iteration = 32; - break; - case Format::RGBA8888: - _func = colorconvert_iyuv_to_rgb; - num_elems_processed_per_iteration = 32; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - - _input = input; - _output = output; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - win.set_dimension_step(Window::DimY, 2); - - unsigned int input_plane_count = 3; - - if(input->info()->format() == Format::NV12 || input->info()->format() == Format::NV21) - { - input_plane_count = 2; - } - - AccessWindowHorizontal input0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle input1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, 0.5f, 0.5f); - AccessWindowRectangle input2_access(input_plane_count == 2 ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, 0.5f, 0.5f); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - input0_access, input1_access, input2_access, - output_access); - - ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), - input->plane(1)->info()->valid_region()); - - if(input_plane_count == 3) - { - intersect_region = intersect_valid_regions(intersect_region, input->plane(2)->info()->valid_region()); - } - - output_access.set_valid_region(win, intersect_region); - - INEKernel::configure(win); -} - -void NEColorConvertKernel::configure(const IImage *input, IMultiImage *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - - set_shape_if_empty(*output->plane(0)->info(), input->info()->tensor_shape()); - - switch(output->info()->format()) - { - case Format::NV12: - { - TensorShape subsampled_shape = input->info()->tensor_shape(); - subsampled_shape.set(0, subsampled_shape[0] / 2); - subsampled_shape.set(1, subsampled_shape[1] / 2); - - set_shape_if_empty(*output->plane(1)->info(), subsampled_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape()); - break; - } - case Format::IYUV: - { - TensorShape subsampled_shape = input->info()->tensor_shape(); - subsampled_shape.set(0, subsampled_shape[0] / 2); - subsampled_shape.set(1, subsampled_shape[1] / 2); - - set_shape_if_empty(*output->plane(1)->info(), subsampled_shape); - set_shape_if_empty(*output->plane(2)->info(), subsampled_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(2)->info()->tensor_shape()); - break; - } - case Format::YUV444: - set_shape_if_empty(*output->plane(1)->info(), input->info()->tensor_shape()); - set_shape_if_empty(*output->plane(2)->info(), input->info()->tensor_shape()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(1)); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(2)); - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(0)); - - unsigned int num_elems_processed_per_iteration = 0; - - switch(input->info()->format()) - { - case Format::RGB888: - { - switch(output->info()->format()) - { - case Format::NV12: - _func = colorconvert_rgb_to_nv12; - num_elems_processed_per_iteration = 16; - break; - case Format::IYUV: - _func = colorconvert_rgb_to_iyuv; - num_elems_processed_per_iteration = 16; - break; - case Format::YUV444: - _func = colorconvert_rgb_to_yuv4; - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::RGBA8888: - { - switch(output->info()->format()) - { - case Format::NV12: - _func = colorconvert_rgb_to_nv12; - num_elems_processed_per_iteration = 16; - break; - case Format::IYUV: - _func = colorconvert_rgb_to_iyuv; - num_elems_processed_per_iteration = 16; - break; - case Format::YUV444: - _func = colorconvert_rgb_to_yuv4; - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::UYVY422: - { - switch(output->info()->format()) - { - case Format::NV12: - _func = colorconvert_yuyv_to_nv12; - num_elems_processed_per_iteration = 32; - break; - case Format::IYUV: - _func = colorconvert_yuyv_to_iyuv; - num_elems_processed_per_iteration = 32; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::YUYV422: - { - switch(output->info()->format()) - { - case Format::NV12: - _func = colorconvert_yuyv_to_nv12; - num_elems_processed_per_iteration = 32; - break; - case Format::IYUV: - _func = colorconvert_yuyv_to_iyuv; - num_elems_processed_per_iteration = 32; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - - _input = input; - _output = output; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - float sub_sampling = 1.f; - - if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444)) - { - win.set_dimension_step(Window::DimY, 2); - sub_sampling = 0.5f; - } - - unsigned int output_plane_count = 3; - - if(output->info()->format() == Format::NV12 || output->info()->format() == Format::NV21) - { - output_plane_count = 2; - } - - AccessWindowHorizontal output0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle output1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling); - AccessWindowRectangle output2_access(output_plane_count == 2 ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), - output0_access, - output1_access, - output2_access); - - output0_access.set_valid_region(win, input->info()->valid_region()); - output1_access.set_valid_region(win, input->info()->valid_region()); - output2_access.set_valid_region(win, input->info()->valid_region()); - - INEKernel::configure(win); -} - -void NEColorConvertKernel::configure(const IMultiImage *input, IMultiImage *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON(input == output); - - set_shape_if_empty(*output->plane(0)->info(), input->plane(0)->info()->tensor_shape()); - - switch(output->info()->format()) - { - case Format::NV12: - { - TensorShape subsampled_shape = input->plane(0)->info()->tensor_shape(); - subsampled_shape.set(0, subsampled_shape[0] / 2); - subsampled_shape.set(1, subsampled_shape[1] / 2); - - set_shape_if_empty(*output->plane(1)->info(), subsampled_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape()); - break; - } - case Format::IYUV: - { - TensorShape subsampled_shape = input->plane(0)->info()->tensor_shape(); - subsampled_shape.set(0, subsampled_shape[0] / 2); - subsampled_shape.set(1, subsampled_shape[1] / 2); - - set_shape_if_empty(*output->plane(1)->info(), subsampled_shape); - set_shape_if_empty(*output->plane(2)->info(), subsampled_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(2)->info()->tensor_shape()); - break; - } - case Format::YUV444: - set_shape_if_empty(*output->plane(1)->info(), input->plane(0)->info()->tensor_shape()); - set_shape_if_empty(*output->plane(2)->info(), input->plane(0)->info()->tensor_shape()); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(1)); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(2)); - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } - - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(0)); - - switch(input->info()->format()) - { - case Format::NV12: - { - switch(output->info()->format()) - { - case Format::IYUV: - _func = colorconvert_nv12_to_iyuv; - break; - case Format::YUV444: - _func = colorconvert_nv12_to_yuv4; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::NV21: - { - switch(output->info()->format()) - { - case Format::IYUV: - _func = colorconvert_nv12_to_iyuv; - break; - case Format::YUV444: - _func = colorconvert_nv12_to_yuv4; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::IYUV: - { - switch(output->info()->format()) - { - case Format::NV12: - _func = colorconvert_iyuv_to_nv12; - break; - case Format::YUV444: - _func = colorconvert_iyuv_to_yuv4; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 32; - constexpr float input_sub_sampling = 0.5f; - const float output_sub_sampling = output->info()->format() == Format::YUV444 ? 1.f : 0.5f; - - // Configure kernel window - Window win = calculate_max_window(*input->plane(0)->info(), Steps(num_elems_processed_per_iteration)); - win.set_dimension_step(Window::DimY, 2); - - unsigned int input_plane_count = 3; - - if(input->info()->format() == Format::NV12 || input->info()->format() == Format::NV21) - { - input_plane_count = 2; - } - - unsigned int output_plane_count = 3; - - if(output->info()->format() == Format::NV12 || output->info()->format() == Format::NV21) - { - output_plane_count = 2; - } - - AccessWindowHorizontal output0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle output1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, output_sub_sampling, output_sub_sampling); - AccessWindowRectangle output2_access(output_plane_count == 2 ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, output_sub_sampling, output_sub_sampling); - - update_window_and_padding(win, - AccessWindowHorizontal(input->plane(0)->info(), 0, num_elems_processed_per_iteration), - AccessWindowRectangle(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, input_sub_sampling, input_sub_sampling), - AccessWindowRectangle(input_plane_count == 2 ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, input_sub_sampling, input_sub_sampling), - output0_access, - output1_access, - output2_access); - - ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), - input->plane(1)->info()->valid_region()); - - if(input_plane_count == 3) - { - intersect_region = intersect_valid_regions(intersect_region, input->plane(2)->info()->valid_region()); - } - - output0_access.set_valid_region(win, intersect_region); - output1_access.set_valid_region(win, intersect_region); - output2_access.set_valid_region(win, intersect_region); - - INEKernel::configure(win); -} - -void NEColorConvertKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (*_func)(_input, _output, window); -} diff --git a/src/core/NEON/kernels/NEColorConvertKernel.h b/src/core/NEON/kernels/NEColorConvertKernel.h deleted file mode 100644 index 1adb624aae..0000000000 --- a/src/core/NEON/kernels/NEColorConvertKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H -#define ARM_COMPUTE_COLORCONVERTKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the color convert kernel */ -class NEColorConvertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEColorConvertKernel"; - } - /** Default constructor */ - NEColorConvertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEColorConvertKernel(const NEColorConvertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; - /** Allow instances of this class to be moved */ - NEColorConvertKernel(NEColorConvertKernel &&) = default; - /** Allow instances of this class to be moved */ - NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; - /** Default destructor */ - ~NEColorConvertKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ITensor *input, ITensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const IMultiImage *input, IImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const IImage *input, IMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const IMultiImage *input, IMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); - const void *_input; - void *_output; - ColorConvertFunction *_func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDerivativeKernel.cpp b/src/core/NEON/kernels/NEDerivativeKernel.cpp deleted file mode 100644 index e5780ea264..0000000000 --- a/src/core/NEON/kernels/NEDerivativeKernel.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEDerivativeKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -NEDerivativeKernel::NEDerivativeKernel() - : _func(nullptr), _input(nullptr), _output_x(nullptr), _output_y(nullptr) -{ -} - -BorderSize NEDerivativeKernel::border_size() const -{ - return BorderSize(1); -} - -void NEDerivativeKernel::configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - const bool run_der_x = output_x != nullptr; - const bool run_der_y = output_y != nullptr; - - if(run_der_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(run_der_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowHorizontal out_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal out_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_processed_per_iteration); - - // TODO(COMPMID-1503) Fix x-access input bug in Neon kernel instead of '+2' - AccessWindowHorizontal in_x_access(input->info(), -border_size().left, num_elems_processed_per_iteration + 2); - AccessWindowRectangle in_y_access(input->info(), 0, -border_size().left, num_elems_processed_per_iteration, num_rows_read_per_iteration); - - // TODO(COMPMID-1503) Fix x-access input bug in Neon kernel instead of '+2' - AccessWindowRectangle in_xy_access(input->info(), -border_size().left, -border_size().top, num_elems_processed_per_iteration + 2, num_rows_read_per_iteration); - - if(run_der_x && run_der_y) - { - _func = &NEDerivativeKernel::derivative_xy; - update_window_and_padding(win, in_xy_access, out_x_access, out_y_access); - out_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - out_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - } - else - { - if(run_der_x) - { - _func = &NEDerivativeKernel::derivative_x; - update_window_and_padding(win, in_x_access, out_x_access); - out_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - } - else if(run_der_y) - { - _func = &NEDerivativeKernel::derivative_y; - update_window_and_padding(win, in_y_access, out_y_access); - out_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - } - else - { - ARM_COMPUTE_ERROR("At least one output must be NOT NULL"); - } - } - - INEKernel::configure(win); -} - -void NEDerivativeKernel::derivative_x(const Window &window) -{ - Iterator in(_input, window); - Iterator out_x(_output_x, window); - - /* Apply 1-D centered point discrete derivative mask ([-1 0 1]) along the X direction */ - execute_window_loop(window, [&](const Coordinates &) - { - /* Load left and right data */ - const uint8x16_t l_data = vld1q_u8(in.ptr() - 1); - const uint8x16_t r_data = vld1q_u8(in.ptr() + 1); - - /* Cast to int16 and perform the subtraction between the right and left data */ - const int16x8_t out0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(r_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(l_data)))); - - /* Cast to int16 and perform the subtraction between the right and left data */ - const int16x8_t out1 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(r_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(l_data)))); - - /* Store result of derivative along the X direction */ - vst1q_s16(reinterpret_cast(out_x.ptr()), out0); - vst1q_s16(reinterpret_cast(out_x.ptr()) + 8, out1); - }, - in, out_x); -} - -void NEDerivativeKernel::derivative_y(const Window &window) -{ - Iterator in(_input, window); - Iterator out_y(_output_y, window); - - const size_t stride = _input->info()->strides_in_bytes()[1]; - - /* Apply 1-D centered point discrete derivative mask ([-1 0 1]^T) along the Y direction */ - execute_window_loop(window, [&](const Coordinates &) - { - /* Load top and bottom data */ - const uint8x16_t t_data = vld1q_u8(in.ptr() - stride); - const uint8x16_t b_data = vld1q_u8(in.ptr() + stride); - - /* Cast to int16 and perform the subtraction between the bottom and top data */ - const int16x8_t out0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(t_data)))); - - /* Cast to int16 and perform the subtraction between the bottom and top data */ - const int16x8_t out1 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(b_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(t_data)))); - - /* Store result of derivative along the Y direction */ - vst1q_s16(reinterpret_cast(out_y.ptr()), out0); - vst1q_s16(reinterpret_cast(out_y.ptr()) + 8, out1); - }, - in, out_y); -} - -void NEDerivativeKernel::derivative_xy(const Window &window) -{ - Iterator in(_input, window); - Iterator out_x(_output_x, window); - Iterator out_y(_output_y, window); - - const size_t stride = _input->info()->strides_in_bytes()[1]; - - /* Apply 1-D centered point discrete derivative masks ([-1 0 1] and [-1 0 1]^T) along the X and Y directions */ - execute_window_loop(window, [&](const Coordinates &) - { - /* Load top, bottom, left and right data */ - const uint8x16_t t_data = vld1q_u8(in.ptr() - stride); - const uint8x16_t b_data = vld1q_u8(in.ptr() + stride); - const uint8x16_t l_data = vld1q_u8(in.ptr() - 1); - const uint8x16_t r_data = vld1q_u8(in.ptr() + 1); - - /* Cast to int16 and perform the subtraction between the bottom and top data */ - const int16x8_t out0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(t_data)))); - - /* Cast to int16 and perform the subtraction between the bottom and top data */ - const int16x8_t out1 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(b_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(t_data)))); - - /* Cast to int16 and perform the subtraction between the right and left data */ - const int16x8_t out2 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(r_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(l_data)))); - - /* Cast to int16 and perform the subtraction between the right and left data */ - const int16x8_t out3 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(r_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(l_data)))); - - /* Store result of derivative along the Y direction */ - vst1q_s16(reinterpret_cast(out_y.ptr()), out0); - vst1q_s16(reinterpret_cast(out_y.ptr()) + 8, out1); - - /* Store result of derivative along the X direction */ - vst1q_s16(reinterpret_cast(out_x.ptr()), out2); - vst1q_s16(reinterpret_cast(out_x.ptr()) + 8, out3); - }, - in, out_x, out_y); -} - -void NEDerivativeKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} diff --git a/src/core/NEON/kernels/NEDerivativeKernel.h b/src/core/NEON/kernels/NEDerivativeKernel.h deleted file mode 100644 index 112b2b0b28..0000000000 --- a/src/core/NEON/kernels/NEDerivativeKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H -#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. - * - */ -class NEDerivativeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDerivativeKernel"; - } - /** Default constructor */ - NEDerivativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivativeKernel(const NEDerivativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDerivativeKernel(NEDerivativeKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; - /** Default destructor */ - ~NEDerivativeKernel() = default; - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Function to perform derivative along the X direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_x(const Window &window); - /** Function to perform derivative along the Y direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_y(const Window &window); - /** Function to perform derivative along the X and Y direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_xy(const Window &window); - /** Common signature for all the specialised derivative functions - * - * @param[in] window Region on which to execute the kernel. - */ - using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); - /** Derivative function to use for the particular tensor types passed to configure() */ - DerivativeFunction _func; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ - ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp deleted file mode 100644 index dc9ec22c71..0000000000 --- a/src/core/NEON/kernels/NEDilateKernel.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEDilateKernel.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -namespace arm_compute -{ -BorderSize NEDilateKernel::border_size() const -{ - return BorderSize(1); -} - -void NEDilateKernel::configure(const ITensor *input, ITensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEDilateKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator in(_input, window); - Iterator out(_output, window); - - const size_t in_stride = _input->info()->strides_in_bytes()[1]; - - execute_window_loop(window, [&](const Coordinates &) - { - uint8_t *in_ptr = in.ptr() - 1; - const uint8x16_t top_data = vld1q_u8(in_ptr - in_stride); - const uint8x16_t mid_data = vld1q_u8(in_ptr); - const uint8x16_t bot_data = vld1q_u8(in_ptr + in_stride); - - uint8x8_t top_high_data = vget_high_u8(top_data); - uint8x8_t top_low_data = vget_low_u8(top_data); - - uint8x8_t mid_high_data = vget_high_u8(mid_data); - uint8x8_t mid_low_data = vget_low_u8(mid_data); - - uint8x8_t bot_high_data = vget_high_u8(bot_data); - uint8x8_t bot_low_data = vget_low_u8(bot_data); - - uint8x8_t p0; - uint8x8_t p1; - - p0 = top_low_data; - p1 = vext_u8(top_low_data, top_high_data, 1); - p0 = vmax_u8(p0, p1); - - p1 = vext_u8(top_low_data, top_high_data, 2); - p0 = vmax_u8(p0, p1); - - p1 = mid_low_data; - p0 = vmax_u8(p0, p1); - - p1 = vext_u8(mid_low_data, mid_high_data, 1); - p0 = vmax_u8(p0, p1); - - p1 = vext_u8(mid_low_data, mid_high_data, 2); - p0 = vmax_u8(p0, p1); - - p1 = bot_low_data; - p0 = vmax_u8(p0, p1); - - p1 = vext_u8(bot_low_data, bot_high_data, 1); - p0 = vmax_u8(p0, p1); - - p1 = vext_u8(bot_low_data, bot_high_data, 2); - p0 = vmax_u8(p0, p1); - - vst1_u8(out.ptr(), p0); - }, - in, out); -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEDilateKernel.h b/src/core/NEON/kernels/NEDilateKernel.h deleted file mode 100644 index f1d34318ed..0000000000 --- a/src/core/NEON/kernels/NEDilateKernel.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDILATEKERNEL_H -#define ARM_COMPUTE_NEDILATEKERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform boolean image dilatation */ -class NEDilateKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEDilateKernel"; - } - /** Default constructor */ - NEDilateKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDilateKernel(const NEDilateKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDilateKernel &operator=(const NEDilateKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDilateKernel(NEDilateKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDilateKernel &operator=(NEDilateKernel &&) = default; - /** Default destructor */ - ~NEDilateKernel() = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp deleted file mode 100644 index 171a6c828f..0000000000 --- a/src/core/NEON/kernels/NEErodeKernel.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEErodeKernel.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -namespace arm_compute -{ -BorderSize NEErodeKernel::border_size() const -{ - return BorderSize(1); -} - -void NEErodeKernel::configure(const ITensor *input, ITensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEErodeKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator in(_input, window); - Iterator out(_output, window); - - const size_t in_stride = _input->info()->strides_in_bytes()[1]; - - execute_window_loop(window, [&](const Coordinates &) - { - uint8_t *in_ptr = in.ptr() - 1; - const uint8x16_t top_data = vld1q_u8(in_ptr - in_stride); - const uint8x16_t mid_data = vld1q_u8(in_ptr); - const uint8x16_t bot_data = vld1q_u8(in_ptr + in_stride); - - uint8x8_t top_high_data = vget_high_u8(top_data); - uint8x8_t top_low_data = vget_low_u8(top_data); - - uint8x8_t mid_high_data = vget_high_u8(mid_data); - uint8x8_t mid_low_data = vget_low_u8(mid_data); - - uint8x8_t bot_high_data = vget_high_u8(bot_data); - uint8x8_t bot_low_data = vget_low_u8(bot_data); - - uint8x8_t p0; - uint8x8_t p1; - - p0 = top_low_data; - p1 = vext_u8(top_low_data, top_high_data, 1); - p0 = vmin_u8(p0, p1); - - p1 = vext_u8(top_low_data, top_high_data, 2); - p0 = vmin_u8(p0, p1); - - p1 = mid_low_data; - p0 = vmin_u8(p0, p1); - - p1 = vext_u8(mid_low_data, mid_high_data, 1); - p0 = vmin_u8(p0, p1); - - p1 = vext_u8(mid_low_data, mid_high_data, 2); - p0 = vmin_u8(p0, p1); - - p1 = bot_low_data; - p0 = vmin_u8(p0, p1); - - p1 = vext_u8(bot_low_data, bot_high_data, 1); - p0 = vmin_u8(p0, p1); - - p1 = vext_u8(bot_low_data, bot_high_data, 2); - p0 = vmin_u8(p0, p1); - - vst1_u8(out.ptr(), p0); - }, - in, out); -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEErodeKernel.h b/src/core/NEON/kernels/NEErodeKernel.h deleted file mode 100644 index 54f286780b..0000000000 --- a/src/core/NEON/kernels/NEErodeKernel.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEERODEKERNEL_H -#define ARM_COMPUTE_NEERODEKERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform boolean image erosion */ -class NEErodeKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEErodeKernel"; - } - /** Default constructor */ - NEErodeKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEErodeKernel(const NEErodeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEErodeKernel &operator=(const NEErodeKernel &) = delete; - /** Allow instances of this class to be moved */ - NEErodeKernel(NEErodeKernel &&) = default; - /** Allow instances of this class to be moved */ - NEErodeKernel &operator=(NEErodeKernel &&) = default; - /** Default destructor */ - ~NEErodeKernel() = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEERODEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp deleted file mode 100644 index c9280d8dc0..0000000000 --- a/src/core/NEON/kernels/NEFastCornersKernel.cpp +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEFastCornersKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -using namespace arm_compute; - -NEFastCornersKernel::NEFastCornersKernel() - : INEKernel(), _input(nullptr), _output(nullptr), _threshold(0), _non_max_suppression(false) -{ -} - -namespace -{ -constexpr size_t PERMUTATIONS = 16; -constexpr size_t PERM_SIZE = 16; - -inline uint8x8x2_t create_permutation_index(size_t k) -{ - ARM_COMPUTE_ERROR_ON(k >= PERMUTATIONS); - - static const std::array, PERM_SIZE> permutations_table{ { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 255, 255, 255, 255, 255, 255, 255 }, - { 15, 0, 1, 2, 3, 4, 5, 6, 7, 255, 255, 255, 255, 255, 255, 255 }, - { 14, 15, 0, 1, 2, 3, 4, 5, 6, 255, 255, 255, 255, 255, 255, 255 }, - { 13, 14, 15, 0, 1, 2, 3, 4, 5, 255, 255, 255, 255, 255, 255, 255 }, - { 12, 13, 14, 15, 0, 1, 2, 3, 4, 255, 255, 255, 255, 255, 255, 255 }, - { 11, 12, 13, 14, 15, 0, 1, 2, 3, 255, 255, 255, 255, 255, 255, 255 }, - { 10, 11, 12, 13, 14, 15, 0, 1, 2, 255, 255, 255, 255, 255, 255, 255 }, - { 9, 10, 11, 12, 13, 14, 15, 0, 1, 255, 255, 255, 255, 255, 255, 255 }, - { 8, 9, 10, 11, 12, 13, 14, 15, 0, 255, 255, 255, 255, 255, 255, 255 }, - { 7, 8, 9, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255 }, - { 6, 7, 8, 9, 10, 11, 12, 13, 14, 255, 255, 255, 255, 255, 255, 255 }, - { 5, 6, 7, 8, 9, 10, 11, 12, 13, 255, 255, 255, 255, 255, 255, 255 }, - { 4, 5, 6, 7, 8, 9, 10, 11, 12, 255, 255, 255, 255, 255, 255, 255 }, - { 3, 4, 5, 6, 7, 8, 9, 10, 11, 255, 255, 255, 255, 255, 255, 255 }, - { 2, 3, 4, 5, 6, 7, 8, 9, 10, 255, 255, 255, 255, 255, 255, 255 }, - { 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, 255 } - - } }; - - const uint8x8x2_t index = - { - { - vld1_u8(permutations_table[k].data()), - vld1_u8(permutations_table[k].data() + 8) - } - }; - - return index; -} - -inline uint8x8x4_t create_circle_index_register() -{ - /* - This function creates the index registers to retrieve the 16 texels in the Bresenham circle of radius 3 with center in P. - - . . F 0 1 . . . - . E . . . 2 . . - D . . . . . 3 . - C . . P . . 4 . - B . . . . . 5 . - . A . . . 6 . . - . . 9 8 7 . . . - - Where . is an irrelevant texel value - - We want to retrieve all texels [0,F] - - The 4 registers in r will then be used to get these texels out of two tables in the function get_circle_texels() - - The first table holds the top 4 rows of texels - . . F 0 1 . . . - . E . . . 2 . . - D . . . . . 3 . - C . . P . . 4 . - - The second table the bottom 3 rows of texels - B . . . . . 5 . - . A . . . 6 . . - . . 9 8 7 . . . - - */ - static const std::array top_right = - { - /* The register r.val[0] will be used to retrieve these texels: - . . . 0 1 . . . - . . . . . 2 . . - . . . . . . 3 . - . . . . . . 4 . - */ - 3 /* top table, first row, elem 4, value 0 in the diagram above */, - 4 /* top table, first row, elem 5, value 1 in the diagram above */, - 13 /* top table, second row, elem 6, value 2 in the diagram above */, - 22 /* top table, third row, elem 7, value 3 in the diagram above*/, - 30 /* top table, fourth row, elem 7, value 4 in the diagram above*/, - 255, - 255, - 255 - }; - - static const std::array bottom_right = - { - /* The register r.val[1] will be used to retrieve these texels: - . . . . . . 5 . - . . . . . 6 . . - . . . . 7 . . . - */ - 255, - 255, - 255, - 255, - 255, - 6 /* low table, first row, elem 7, value 5 in the diagram above*/, - 13 /* low table, second row, elem 6, value 6 in the diagram above*/, - 20 /* low table, third row, elem 5, value 7 in the diagram above*/ - }; - - static const std::array top_left = - { - /* The register r.val[2] will be used to retrieve these texels: - . . F . . . . . - . E . . . . . . - D . . . . . . . - C . . . . . . . - */ - 255, - 255, - 255, - 255, - 24 /* top table, fourth row, elem 1, value C in the diagram above */, - 16 /* top table, third row, elem 1, value D in the diagram above*/, - 9 /* top table, second row, elem 2, value E in the diagram above*/, - 2 /* top table, first row, elem 3, value F in the diagram above*/ - }; - - static const std::array bottom_left = - { - /* The register r.val[3] will be used to retrieve these texels: - B . . . . . . . - . A . . . . . . - . . 9 8 . . . . - */ - 19 /* low table, third row, elem 4, value 8 in the diagram above */, - 18 /* low table, third row, elem 3, value 9 in the diagram above */, - 9 /* low table, second row, elem 2, value A in the diagram above */, - 0 /* low table, first row, elem 1, value B in the diagram above */, - 255, - 255, - 255, - 255 - }; - - const uint8x8x4_t reg = - { - { - vld1_u8(top_right.data()), - vld1_u8(bottom_right.data()), - vld1_u8(top_left.data()), - vld1_u8(bottom_left.data()) - } - }; - - return reg; -} - -inline uint8x16_t get_circle_texels(const uint8x8x4_t &index, const uint8x8x4_t &tbl_hi, const uint8x8x3_t &tbl_lo) -{ - /* - This function loads the 16 texels in the Bresenham circle of radius 3 into the register 'texels'. - The parameter 'index' is an array of indices which was previously setup in setup_circle_index_register(). - tbl_hi and tbl_lo are the two tables holding the texels in the window [(-3,-3),(+3,+3)] for a given texel P - */ - return vcombine_u8(vtbx3_u8(vtbl4_u8(tbl_hi, index.val[0]), tbl_lo, index.val[1]), - vtbx3_u8(vtbl4_u8(tbl_hi, index.val[2]), tbl_lo, index.val[3])); -} - -inline uint8x16_t get_permutation_texels(const uint8x8x2_t &permutation_index, const uint8x8x2_t &tbl_circle) -{ - /* - This function stores the 9 texels of a give permutation X in the neon register 'texels' - - 'tbl_circle' is a LUT with the texels 0 to F - - . . F 0 1 . . . - . E . . . 2 . . - D . . . . . 3 . - C . . P . . 4 . - B . . . . . 5 . - . A . . . 6 . . - . . 9 8 7 . . . - - 'permutation_index' is one of the permutations below: - - { 0, 1, 2, 3, 4, 5, 6, 7, 8}, - { F, 0, 1, 2, 3, 4, 5, 6, 7}, - { E, F, 0, 1, 2, 3, 4, 5, 6}, - { D, E, F, 0, 1, 2, 3, 4, 5}, - { C, D, E, F, 0, 1, 2, 3, 4}, - { B, C, D, E, F, 0, 1, 2, 3}, - { A, B, C, D, E, F, 0, 1, 2}, - { 9, A, B, C, D, E, F, 0, 1}, - { 8, 9, A, B, C, D, E, F, 0}, - { 7, 8, 9, A, B, C, D, E, F}, - { 6, 7, 8, 9, A, B, C, D, E}, - { 5, 6, 7, 8, 9, A, B, C, D}, - { 4, 5, 6, 7, 8, 9, A, B, C}, - { 3, 4, 5, 6, 7, 8, 9, A, B}, - { 2, 3, 4, 5, 6, 7, 8, 9, A}, - { 1, 2, 3, 4, 5, 6, 7, 8, 9}, - */ - static const uint8x8_t perm_right = vdup_n_u8(255); // init to 255 so that vtbx preserves the original values of the lanes - - return vcombine_u8(vtbl2_u8(tbl_circle, permutation_index.val[0]), - vtbx2_u8(perm_right, tbl_circle, permutation_index.val[1])); -} - -inline bool is_permutation_brighter(const uint8x16_t &permutation, const uint8x16_t &pg) -{ - const uint8x16_t res_gt = vcgtq_u8(permutation, pg); - - return vget_lane_u64(vreinterpret_u64_u8(vand_u8(vget_high_u8(res_gt), vget_low_u8(res_gt))), 0) == std::numeric_limits::max(); -} - -inline bool is_permutation_darker(const uint8x16_t &permutation, const uint8x16_t &pl) -{ - const uint8x16_t res_lt = vcltq_u8(permutation, pl); - const uint64x2_t u64res_lt = vreinterpretq_u64_u8(res_lt); - const uint64_t t3 = vgetq_lane_u64(u64res_lt, 0); - const uint64_t t4 = vgetq_lane_u64(u64res_lt, 1); - - return std::numeric_limits::max() == t3 && 255 == t4; -} - -inline bool is_permutation_corner(const uint8x16_t &permutation, const uint8x16_t &pg, const uint8x16_t &pl) -{ - return is_permutation_brighter(permutation, pg) || is_permutation_darker(permutation, pl); -} - -inline bool point_is_fast_corner(uint8_t p, uint8_t threshold, const uint8x8x2_t &tbl_circle_texels, std::array &perm_indices) -{ - /* - This function determines whether the point 'p' is a corner. - */ - uint8x16_t pg = vqaddq_u8(vdupq_n_u8(p), vdupq_n_u8(threshold)); - uint8x16_t pl = vqsubq_u8(vdupq_n_u8(p), vdupq_n_u8(threshold)); - - bool corner_detected = false; - - for(size_t j = 0; !corner_detected && j < PERMUTATIONS; ++j) - { - const uint8x16_t pe_texels = get_permutation_texels(perm_indices[j], tbl_circle_texels); - corner_detected = is_permutation_corner(pe_texels, pg, pl); - } - - return corner_detected; -} - -inline uint8x8x2_t create_circle_tbl(const std::array &buffer, size_t in_offset, const uint8x8x4_t &circle_index_r) -{ - /* - This function builds a LUT holding the 16 texels in the Brensenham circle radius 3. - circle_index_r is a vector of 4 registers to retrieve the texels from the two tables mentioned above. - */ - - //Load the texels in the window [(x-3,y-3),(x+3,y+3)]. - //The top 4 rows are loaded in tbl_hi and the low 3 rows in tbl_lo. - //These two tables are then used to retrieve the texels in the Bresenham circle of radius 3. - const uint8x8x4_t tbl_window_hi = - { - { - vld1_u8(buffer[0] + in_offset), - vld1_u8(buffer[1] + in_offset), - vld1_u8(buffer[2] + in_offset), - vld1_u8(buffer[3] + in_offset) - } - }; - - const uint8x8x3_t tbl_window_lo = - { - { - vld1_u8(buffer[4] + in_offset), - vld1_u8(buffer[5] + in_offset), - vld1_u8(buffer[6] + in_offset) - } - }; - - const uint8x16_t circle_texels = get_circle_texels(circle_index_r, tbl_window_hi, tbl_window_lo); - - const uint8x8x2_t tbl_circle_texels = - { - { - vget_low_u8(circle_texels), - vget_high_u8(circle_texels) - } - }; - - return tbl_circle_texels; -} - -inline uint8_t get_point_score(uint8_t p, uint8_t tolerance, const uint8x8x2_t &tbl_circle, std::array &perm_indices) -{ - uint8_t b = 255; - uint8_t a = tolerance; - - while(b - a > 1) - { - const uint16_t ab = a + b; - const uint8_t c = ab >> 1; - - if(point_is_fast_corner(p, c, tbl_circle, perm_indices)) - { - a = c; - } - else - { - b = c; - } - } - - return a; -} -} // namespace - -BorderSize NEFastCornersKernel::border_size() const -{ - return BorderSize(3); -} - -void NEFastCornersKernel::configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MSG(border_undefined == false, "Not implemented"); - - _input = input; - _output = output; - _threshold = threshold; - _non_max_suppression = non_max_suppression; - - constexpr unsigned int num_elems_processed_per_iteration = 1; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 1; - constexpr unsigned int num_rows_read_per_iteration = 7; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEFastCornersKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - std::array perm_index{ {} }; - /* - We use a LUT loaded with 7 rows of uint8_t from the input image [-3,-3]...[+3,+3] to retrieve the texels in the Brensenham circle radius 3 and put them in one neon register uint8x16_t. - The three lines below setup the neon index registers to get these texels out from the table - */ - const uint8x8x4_t circle_index_r = create_circle_index_register(); - /* - We put the 16 texels (circle) in a LUT to easily generate all the permutations. The for block below setups the indices for each permutation. - */ - for(size_t k = 0; k < PERMUTATIONS; ++k) - { - perm_index[k] = create_permutation_index(k); - } - - Iterator in(_input, window); - Iterator out(_output, window); - - const std::array in_row - { - _input->ptr_to_element(Coordinates(-3, -3)), - _input->ptr_to_element(Coordinates(-3, -2)), - _input->ptr_to_element(Coordinates(-3, -1)), - _input->ptr_to_element(Coordinates(-3, 0)), - _input->ptr_to_element(Coordinates(-3, 1)), - _input->ptr_to_element(Coordinates(-3, 2)), - _input->ptr_to_element(Coordinates(-3, 3)) - }; - - auto is_rejected = [](uint8_t p, uint8_t q, uint8_t a, uint8_t b) - { - const bool p_is_in_ab = (a <= p) && (p <= b); - const bool q_is_in_ab = (a <= q) && (q <= b); - return p_is_in_ab && q_is_in_ab; - }; - - execute_window_loop(window, [&](const Coordinates &) - { - const size_t in_offset = in.offset(); - const uint8_t p0 = *in.ptr(); - const uint8_t b = std::min(p0 + _threshold, 255); - const uint8_t a = std::max(p0 - _threshold, 0); - uint8_t score = 0; - /* - Fast check to discard points which cannot be corners and avoid the expensive computation of the potential 16 permutations - - pixels 1 and 9 are examined, if both I1 and I9 are within [Ip - t, Ip + t], then candidate p is not a corner. - */ - const uint8_t p1 = (in_offset + in_row[0])[3]; - const uint8_t p9 = (in_offset + in_row[6])[3]; - - if(!is_rejected(p1, p9, a, b)) - { - /* pixels 5 and 13 are further examined to check whether three of them are brighter than Ip + t or darker than Ip - t */ - const uint8_t p5 = (in_offset + in_row[3])[6]; - const uint8_t p13 = (in_offset + in_row[3])[0]; - - if(!is_rejected(p5, p13, a, b)) - { - /* at this stage we use the full test with the 16 permutations to classify the point as corner or not */ - const uint8x8x2_t tbl_circle_texel = create_circle_tbl(in_row, in_offset, circle_index_r); - - if(point_is_fast_corner(p0, _threshold, tbl_circle_texel, perm_index)) - { - if(_non_max_suppression) - { - score = get_point_score(p0, _threshold, tbl_circle_texel, perm_index); - } - else - { - score = 1; - } - } - } - } - - *out.ptr() = score; - }, - in, out); -} diff --git a/src/core/NEON/kernels/NEFastCornersKernel.h b/src/core/NEON/kernels/NEFastCornersKernel.h deleted file mode 100644 index f981d72a03..0000000000 --- a/src/core/NEON/kernels/NEFastCornersKernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H -#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Neon kernel to perform fast corners */ -class NEFastCornersKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFastCornersKernel"; - } - /** Constructor */ - NEFastCornersKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCornersKernel(const NEFastCornersKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFastCornersKernel(NEFastCornersKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; - /** Default destructor */ - ~NEFastCornersKernel() = default; - /** Initialise the kernel. - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Output image. Data type supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const IImage *_input; /**< source image */ - IImage *_output; /**< inermediate results */ - uint8_t _threshold; /**< threshold on difference between intensity */ - bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp b/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp deleted file mode 100644 index 63b26ab7c0..0000000000 --- a/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Validate.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -using namespace arm_compute; - -BorderSize NEGaussian3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void NEGaussian3x3Kernel::configure(const ITensor *input, ITensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - _input = input; - _output = output; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEGaussian3x3Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator input(_input, window); - Iterator output(_output, window); - - const uint8_t *input_bot_ptr = _input->ptr_to_element(Coordinates(-1, -1)); - const uint8_t *input_mid_ptr = _input->ptr_to_element(Coordinates(-1, 0)); - const uint8_t *input_top_ptr = _input->ptr_to_element(Coordinates(-1, +1)); - - static const int16x8_t two = vdupq_n_s16(2); - static const int16x8_t four = vdupq_n_s16(4); - - execute_window_loop(window, [&](const Coordinates &) - { - uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t mid_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - //top left - int16x8_t out = top_s16.val[0]; - //top mid - out = vmlaq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 1), two); - //top right - out = vaddq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 2)); - //mid left - out = vmlaq_s16(out, mid_s16.val[0], two); - //mid mid - out = vmlaq_s16(out, vextq_s16(mid_s16.val[0], mid_s16.val[1], 1), four); - //mid right - out = vmlaq_s16(out, vextq_s16(mid_s16.val[0], mid_s16.val[1], 2), two); - //bot left - out = vaddq_s16(out, bot_s16.val[0]); - //bot mid - out = vmlaq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 1), two); - //bot right - out = vaddq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2)); - - vst1_u8(output.ptr(), vqshrun_n_s16(out, 4)); - }, - input, output); -} diff --git a/src/core/NEON/kernels/NEGaussian3x3Kernel.h b/src/core/NEON/kernels/NEGaussian3x3Kernel.h deleted file mode 100644 index 7ceea2e7c1..0000000000 --- a/src/core/NEON/kernels/NEGaussian3x3Kernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H -#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Neon kernel to perform a Gaussian 3x3 filter */ -class NEGaussian3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian3x3Kernel"; - } - /** Constructor */ - NEGaussian3x3Kernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian3x3Kernel(const NEGaussian3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian3x3Kernel &operator=(const NEGaussian3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussian3x3Kernel(NEGaussian3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussian3x3Kernel &operator=(NEGaussian3x3Kernel &&) = default; - /** Default destructor */ - ~NEGaussian3x3Kernel() = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp b/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp deleted file mode 100644 index ab2feb0dc2..0000000000 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -using namespace arm_compute; - -NEGaussian5x5HorKernel::NEGaussian5x5HorKernel() - : _border_size(0) -{ -} - -BorderSize NEGaussian5x5HorKernel::border_size() const -{ - return _border_size; -} - -void NEGaussian5x5HorKernel::configure(const ITensor *input, ITensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16); - - _input = input; - _output = output; - _border_size = BorderSize(border_undefined ? 0 : 2, 2); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), -border_size().left, num_elems_read_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEGaussian5x5HorKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - Window win_in(window); - win_in.shift(Window::DimX, -2); - - Iterator input(_input, win_in); - Iterator output(_output, window); - - static const int16x8_t six = vdupq_n_s16(6); - static const int16x8_t four = vdupq_n_s16(4); - - execute_window_loop(window, [&](const Coordinates &) - { - uint8x16_t data = vld1q_u8(input.ptr()); - - const int16x8x2_t data_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data))) - } - }; - - int16x8_t out = vaddq_s16(data_s16.val[0], vextq_s16(data_s16.val[0], data_s16.val[1], 4)); - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 1), four); - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 2), six); - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 3), four); - - vst1q_s16(reinterpret_cast(output.ptr()), out); - }, - input, output); -} - -NEGaussian5x5VertKernel::NEGaussian5x5VertKernel() -{ -} - -BorderSize NEGaussian5x5VertKernel::border_size() const -{ - return BorderSize{ 2, 0 }; -} - -void NEGaussian5x5VertKernel::configure(const ITensor *input, ITensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - _input = input; - _output = output; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - constexpr unsigned int num_elems_read_per_iteration = 32; - constexpr unsigned int num_elems_written_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = 5; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEGaussian5x5VertKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator input(_input, window); - Iterator output(_output, window); - - const uint8_t *input_top2_ptr = _input->ptr_to_element(Coordinates(0, -2)); - const uint8_t *input_top_ptr = _input->ptr_to_element(Coordinates(0, -1)); - const uint8_t *input_mid_ptr = _input->ptr_to_element(Coordinates(0, 0)); - const uint8_t *input_low_ptr = _input->ptr_to_element(Coordinates(0, 1)); - const uint8_t *input_low2_ptr = _input->ptr_to_element(Coordinates(0, 2)); - - const uint16x8_t six = vdupq_n_u16(6); - const uint16x8_t four = vdupq_n_u16(4); - - execute_window_loop(window, [&](const Coordinates &) - { - const size_t input_offset_high_s16 = input.offset(); - const size_t input_offset_low_s16 = input.offset() + 16; - - //HIGH DATA - //top2 - uint16x8_t data_high = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top2_ptr + input_offset_high_s16))); - uint16x8_t out_high = data_high; - //top - data_high = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top_ptr + input_offset_high_s16))); - out_high = vmlaq_u16(out_high, data_high, four); - //mid - data_high = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_mid_ptr + input_offset_high_s16))); - out_high = vmlaq_u16(out_high, data_high, six); - //low - data_high = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low_ptr + input_offset_high_s16))); - out_high = vmlaq_u16(out_high, data_high, four); - //low2 - data_high = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low2_ptr + input_offset_high_s16))); - out_high = vaddq_u16(out_high, data_high); - - //LOW DATA - //top2 - uint16x8_t data_low = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top2_ptr + input_offset_low_s16))); - uint16x8_t out_low = data_low; - //top - data_low = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top_ptr + input_offset_low_s16))); - out_low = vmlaq_u16(out_low, data_low, four); - //mid - data_low = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_mid_ptr + input_offset_low_s16))); - out_low = vmlaq_u16(out_low, data_low, six); - //low - data_low = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low_ptr + input_offset_low_s16))); - out_low = vmlaq_u16(out_low, data_low, four); - //low2 - data_low = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low2_ptr + input_offset_low_s16))); - out_low = vaddq_u16(out_low, data_low); - - vst1q_u8(output.ptr(), vcombine_u8(vqshrn_n_u16(out_high, 8), - vqshrn_n_u16(out_low, 8))); - }, - input, output); -} diff --git a/src/core/NEON/kernels/NEGaussian5x5Kernel.h b/src/core/NEON/kernels/NEGaussian5x5Kernel.h deleted file mode 100644 index 2c7262f827..0000000000 --- a/src/core/NEON/kernels/NEGaussian5x5Kernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H -#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Neon kernel to perform a Gaussian 5x5 filter (horizontal pass) */ -class NEGaussian5x5HorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian5x5HorKernel"; - } - /** Default constructor */ - NEGaussian5x5HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussian5x5HorKernel(NEGaussian5x5HorKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussian5x5HorKernel &operator=(NEGaussian5x5HorKernel &&) = default; - /** Default destructor */ - ~NEGaussian5x5HorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; -}; - -/** Neon kernel to perform a Gaussian 5x5 filter (vertical pass) */ -class NEGaussian5x5VertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian5x5VertKernel"; - } - /** Default constructor */ - NEGaussian5x5VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussian5x5VertKernel(NEGaussian5x5VertKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussian5x5VertKernel &operator=(NEGaussian5x5VertKernel &&) = default; - /** Default destructor */ - ~NEGaussian5x5VertKernel() = default; - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */ diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp b/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp deleted file mode 100644 index 49c8e9ec3e..0000000000 --- a/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -using namespace arm_compute; - -NEGaussianPyramidHorKernel::NEGaussianPyramidHorKernel() - : _l2_load_offset(0) -{ -} - -BorderSize NEGaussianPyramidHorKernel::border_size() const -{ - return BorderSize{ 0, 2 }; -} - -void NEGaussianPyramidHorKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1)); - - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i)); - } - - _input = input; - _output = output; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - constexpr unsigned int num_elems_read_per_iteration = 32; - constexpr unsigned int num_elems_written_per_iteration = 8; - const float scale_x = static_cast(output->info()->dimension(0)) / input->info()->dimension(0); - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration, scale_x); - - // Sub sampling selects odd pixels (1, 3, 5, ...) for images with even - // width and even pixels (0, 2, 4, ...) for images with odd width. (Whether - // a pixel is even or odd is determined based on the tensor shape not the - // valid region!) - // Thus the offset from which the first pixel (L2) for the convolution is - // loaded depends on the anchor and shape of the valid region. - // In the case of an even shape (= even image width) we need to load L2 - // from -2 if the anchor is odd and from -1 if the anchor is even. That - // makes sure that L2 is always loaded from an odd pixel. - // On the other hand, for an odd shape (= odd image width) we need to load - // L2 from -1 if the anchor is odd and from -2 if the anchor is even to - // achieve the opposite effect. - // The condition can be simplified to checking whether anchor + shape is - // odd (-2) or even (-1) as only adding an odd and an even number will have - // an odd result. - _l2_load_offset = -border_size().left; - - if((_input->info()->valid_region().anchor[0] + _input->info()->valid_region().shape[0]) % 2 == 0) - { - _l2_load_offset += 1; - } - - // Replace input access with static window - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), _l2_load_offset, num_elems_read_per_iteration), - output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NEGaussianPyramidHorKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(window.x().step() % 2); - - static const int16x8_t six = vdupq_n_s16(6); - static const int16x8_t four = vdupq_n_s16(4); - - Window win_in(window); - win_in.shift(Window::DimX, _l2_load_offset); - - Iterator in(_input, win_in); - - // The output is half the width of the input - Window win_out(window); - win_out.scale(Window::DimX, 0.5f); - - Iterator out(_output, win_out); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16x2_t data_2q = vld2q_u8(in.ptr()); - const uint8x16_t &data_even = data_2q.val[0]; - const uint8x16_t &data_odd = data_2q.val[1]; - - const int16x8_t data_l2 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data_even))); - const int16x8_t data_l1 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data_odd))); - const int16x8_t data_m = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(vextq_u8(data_even, data_even, 1)))); - const int16x8_t data_r1 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(vextq_u8(data_odd, data_odd, 1)))); - const int16x8_t data_r2 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(vextq_u8(data_even, data_even, 2)))); - - int16x8_t out_val = vaddq_s16(data_l2, data_r2); - out_val = vmlaq_s16(out_val, data_l1, four); - out_val = vmlaq_s16(out_val, data_m, six); - out_val = vmlaq_s16(out_val, data_r1, four); - - vst1q_s16(reinterpret_cast(out.ptr()), out_val); - }, - in, out); -} - -NEGaussianPyramidVertKernel::NEGaussianPyramidVertKernel() - : _t2_load_offset(0) -{ -} - -BorderSize NEGaussianPyramidVertKernel::border_size() const -{ - return BorderSize{ 2, 0 }; -} - -void NEGaussianPyramidVertKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); - - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i)); - } - - _input = input; - _output = output; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - constexpr unsigned int num_rows_processed_per_iteration = 2; - - constexpr unsigned int num_elems_written_per_iteration = 16; - constexpr unsigned int num_rows_written_per_iteration = 1; - - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = 5; - - const float scale_y = static_cast(output->info()->dimension(1)) / input->info()->dimension(1); - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_rows_processed_per_iteration)); - AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_written_per_iteration, 1.f, scale_y); - - // Determine whether we need to load even or odd rows. See above for a - // detailed explanation. - _t2_load_offset = -border_size().top; - - if((_input->info()->valid_region().anchor[1] + _input->info()->valid_region().shape[1]) % 2 == 0) - { - _t2_load_offset += 1; - } - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), 0, _t2_load_offset, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NEGaussianPyramidVertKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(window.x().step() != 16); - ARM_COMPUTE_ERROR_ON(window.y().step() % 2); - ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr); - - static const uint16x8_t six = vdupq_n_u16(6); - static const uint16x8_t four = vdupq_n_u16(4); - - Window win_in(window); - // Need to load two times 8 values instead of 16 values once - win_in.set_dimension_step(Window::DimX, 8); - win_in.shift(Window::DimY, _t2_load_offset); - - Iterator in(_input, win_in); - - // Output's height is half of input's - Window win_out(window); - win_out.scale(Window::DimY, 0.5f); - - Iterator out(_output, win_out); - - const uint8_t *input_top2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(0, 0)); - const uint8_t *input_top_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(0, 1)); - const uint8_t *input_mid_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(0, 2)); - const uint8_t *input_low_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(0, 3)); - const uint8_t *input_low2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(0, 4)); - - execute_window_loop(window, [&](const Coordinates &) - { - // Low data - const uint16x8_t data_low_t2 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top2_ptr + in.offset()))); - const uint16x8_t data_low_t1 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top_ptr + in.offset()))); - const uint16x8_t data_low_m = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_mid_ptr + in.offset()))); - const uint16x8_t data_low_b1 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low_ptr + in.offset()))); - const uint16x8_t data_low_b2 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low2_ptr + in.offset()))); - - uint16x8_t out_low = vaddq_u16(data_low_t2, data_low_b2); - out_low = vmlaq_u16(out_low, data_low_t1, four); - out_low = vmlaq_u16(out_low, data_low_m, six); - out_low = vmlaq_u16(out_low, data_low_b1, four); - - in.increment(Window::DimX); - - // High data - const uint16x8_t data_high_t2 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top2_ptr + in.offset()))); - const uint16x8_t data_high_t1 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_top_ptr + in.offset()))); - const uint16x8_t data_high_m = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_mid_ptr + in.offset()))); - const uint16x8_t data_high_b1 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low_ptr + in.offset()))); - const uint16x8_t data_high_b2 = vreinterpretq_u16_s16(vld1q_s16(reinterpret_cast(input_low2_ptr + in.offset()))); - - uint16x8_t out_high = vaddq_u16(data_high_t2, data_high_b2); - out_high = vmlaq_u16(out_high, data_high_t1, four); - out_high = vmlaq_u16(out_high, data_high_m, six); - out_high = vmlaq_u16(out_high, data_high_b1, four); - - vst1q_u8(out.ptr(), vcombine_u8(vqshrn_n_u16(out_low, 8), vqshrn_n_u16(out_high, 8))); - }, - in, out); -} diff --git a/src/core/NEON/kernels/NEGaussianPyramidKernel.h b/src/core/NEON/kernels/NEGaussianPyramidKernel.h deleted file mode 100644 index d943990535..0000000000 --- a/src/core/NEON/kernels/NEGaussianPyramidKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H -#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Neon kernel to perform a GaussianPyramid (horizontal pass) */ -class NEGaussianPyramidHorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussianPyramidHorKernel"; - } - /** Default constructor */ - NEGaussianPyramidHorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; - /** Default destructor */ - ~NEGaussianPyramidHorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - int _l2_load_offset; -}; - -/** Neon kernel to perform a GaussianPyramid (vertical pass) */ -class NEGaussianPyramidVertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussianPyramidVertKernel"; - } - /** Default constructor */ - NEGaussianPyramidVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; - /** Default destructor */ - ~NEGaussianPyramidVertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - int _t2_load_offset; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp b/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp deleted file mode 100644 index 089cd34e0c..0000000000 --- a/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp +++ /dev/null @@ -1,806 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/HOGInfo.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -using namespace arm_compute; - -namespace -{ -void cell_width_lt8(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, - size_t mag_stride, size_t phase_stride, size_t cell_width, size_t cell_height, size_t num_bins, float phase_scale) -{ - const float32x4_t scale_f32 = vdupq_n_f32(phase_scale); - static const float32x4_t one_f32 = vdupq_n_f32(1.0f); - static const float32x4_t zerofive_f32 = vdupq_n_f32(0.5f); - static const int32x4_t zero_s32 = vdupq_n_s32(0); - static const int32x4_t one_s32 = vdupq_n_s32(1); - const int32x4_t num_bins_s32 = vdupq_n_s32(num_bins); - - memset(output_ptr, 0, sizeof(float) * num_bins); - - for(size_t yc = 0; yc < cell_height; ++yc) - { - int32_t xc = 0; - - for(; xc <= static_cast(cell_width) - 4; xc += 4) - { - // Load magnitude and phase values - const uint8x8_t phase_u8 = vld1_u8(phase_row_ptr + xc + yc * phase_stride); - const int16x4_t mag_s16 = vld1_s16(mag_row_ptr + xc + yc * mag_stride); - - // Convert magnitude and phase to float - const float32x4_t mag_f32 = vcvtq_f32_s32(vmovl_s16(mag_s16)); - float32x4_t phase_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(phase_u8)))); - - // Scale phase: phase * scale + 0.5f - phase_f32 = vmlaq_f32(zerofive_f32, phase_f32, scale_f32); - - // Compute histogram index. - int32x4_t hidx_s32 = vcvtq_s32_f32(phase_f32); - - // Compute magnitude weights (w0 and w1) - const float32x4_t hidx_f32 = vcvtq_f32_s32(hidx_s32); - - // w1 = phase_f32 - hidx_f32 - const float32x4_t w1_f32 = vsubq_f32(phase_f32, hidx_f32); - - // w0 = 1.0 - w1 - const float32x4_t w0_f32 = vsubq_f32(one_f32, w1_f32); - - // Compute contribute for splitting vote - const float32x4_t mag_w0_f32 = vmulq_f32(mag_f32, w0_f32); - const float32x4_t mag_w1_f32 = vmulq_f32(mag_f32, w1_f32); - - // Weighted vote between 2 bins - - // Check if the histogram index is equal to num_bins. If so, replace the index with 0 - uint32x4_t mask = vceqq_s32(hidx_s32, num_bins_s32); - hidx_s32 = vbslq_s32(mask, zero_s32, hidx_s32); - - // Bin 0 - *(output_ptr + vgetq_lane_s32(hidx_s32, 0)) += vgetq_lane_f32(mag_w0_f32, 0); - *(output_ptr + vgetq_lane_s32(hidx_s32, 1)) += vgetq_lane_f32(mag_w0_f32, 1); - *(output_ptr + vgetq_lane_s32(hidx_s32, 2)) += vgetq_lane_f32(mag_w0_f32, 2); - *(output_ptr + vgetq_lane_s32(hidx_s32, 3)) += vgetq_lane_f32(mag_w0_f32, 3); - - hidx_s32 = vaddq_s32(hidx_s32, one_s32); - - // Check if the histogram index is equal to num_bins - mask = vceqq_s32(hidx_s32, num_bins_s32); - hidx_s32 = vbslq_s32(mask, zero_s32, hidx_s32); - - // Bin1 - *(output_ptr + vgetq_lane_s32(hidx_s32, 0)) += vgetq_lane_f32(mag_w1_f32, 0); - *(output_ptr + vgetq_lane_s32(hidx_s32, 1)) += vgetq_lane_f32(mag_w1_f32, 1); - *(output_ptr + vgetq_lane_s32(hidx_s32, 2)) += vgetq_lane_f32(mag_w1_f32, 2); - *(output_ptr + vgetq_lane_s32(hidx_s32, 3)) += vgetq_lane_f32(mag_w1_f32, 3); - } - - for(; xc < static_cast(cell_width); ++xc) - { - const float phase_value = *(phase_row_ptr + xc + yc * phase_stride) * phase_scale + 0.5f; - const float mag_value = *(mag_row_ptr + xc + yc * mag_stride); - - const float w1 = phase_value - std::floor(phase_value); - - // The quantised phase is the histogram index [0, num_bins - 1] - Round - // Check limit of histogram index. If hidx == num_bins, hidx = 0 - const auto hidx = static_cast(phase_value) % num_bins; - - // Weighted vote between 2 bins - *(output_ptr + hidx) += mag_value * (1.0f - w1); - *(output_ptr + ((hidx + 1) % (num_bins))) += mag_value * w1; - } - } -} - -void cell_width_ge8(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, - size_t cell_height, size_t num_bins, float phase_scale) -{ - const float32x4_t scale_f32 = vdupq_n_f32(phase_scale); - static const float32x4_t one_f32 = vdupq_n_f32(1.0f); - static const float32x4_t zerofive_f32 = vdupq_n_f32(0.5f); - static const int32x4_t zero_s32 = vdupq_n_s32(0); - static const int32x4_t one_s32 = vdupq_n_s32(1); - const int32x4_t num_bins_s32 = vdupq_n_s32(num_bins); - - memset(output_ptr, 0, sizeof(float) * num_bins); - - for(size_t yc = 0; yc < cell_height; ++yc) - { - int32_t xc = 0; - - for(; xc <= static_cast(cell_width) - 8; xc += 8) - { - // Load magnitude and phase values - const uint8x8_t phase_u8 = vld1_u8(phase_row_ptr + xc + yc * phase_stride); - const int16x8_t mag_s16 = vld1q_s16(mag_row_ptr + xc + yc * mag_stride); - - // Convert phase to U16 - const uint16x8_t phase_u16 = vmovl_u8(phase_u8); - - // Convert magnitude to float32 - const float32x4x2_t mag_f32 = - { - { - vcvtq_f32_s32(vmovl_s16(vget_low_s16(mag_s16))), - vcvtq_f32_s32(vmovl_s16(vget_high_s16(mag_s16))) - } - }; - - // Convert phase to float32 - float32x4x2_t phase_f32 = - { - { - vcvtq_f32_u32(vmovl_u16(vget_low_u16(phase_u16))), - vcvtq_f32_u32(vmovl_u16(vget_high_u16(phase_u16))) - } - }; - - // Scale phase: phase * scale + 0.5f - phase_f32.val[0] = vmlaq_f32(zerofive_f32, phase_f32.val[0], scale_f32); - phase_f32.val[1] = vmlaq_f32(zerofive_f32, phase_f32.val[1], scale_f32); - - // Compute histogram index. - int32x4x2_t hidx_s32 = - { - { - vcvtq_s32_f32(phase_f32.val[0]), - vcvtq_s32_f32(phase_f32.val[1]) - } - }; - - // Compute magnitude weights (w0 and w1) - const float32x4x2_t hidx_f32 = - { - { - vcvtq_f32_s32(hidx_s32.val[0]), - vcvtq_f32_s32(hidx_s32.val[1]) - } - }; - - float32x4x2_t w1_f32 = - { - { - vsubq_f32(phase_f32.val[0], hidx_f32.val[0]), - vsubq_f32(phase_f32.val[1], hidx_f32.val[1]) - } - }; - - float32x4x2_t w0_f32 = - { - { - vsubq_f32(one_f32, w1_f32.val[0]), - vsubq_f32(one_f32, w1_f32.val[1]) - } - }; - - // Compute contribute for splitting vote - const float32x4x2_t mag_w0_f32 = - { - { - vmulq_f32(mag_f32.val[0], w0_f32.val[0]), - vmulq_f32(mag_f32.val[1], w0_f32.val[1]) - } - }; - - const float32x4x2_t mag_w1_f32 = - { - { - vmulq_f32(mag_f32.val[0], w1_f32.val[0]), - vmulq_f32(mag_f32.val[1], w1_f32.val[1]) - } - }; - - // Weighted vote between 2 bins - - // Check if the histogram index is equal to num_bins - uint32x4x2_t mask = - { - { - vceqq_s32(hidx_s32.val[0], num_bins_s32), - vceqq_s32(hidx_s32.val[1], num_bins_s32) - } - }; - - hidx_s32.val[0] = vbslq_s32(mask.val[0], zero_s32, hidx_s32.val[0]); - hidx_s32.val[1] = vbslq_s32(mask.val[1], zero_s32, hidx_s32.val[1]); - - // First bin - Low - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 0)) += vgetq_lane_f32(mag_w0_f32.val[0], 0); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 1)) += vgetq_lane_f32(mag_w0_f32.val[0], 1); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 2)) += vgetq_lane_f32(mag_w0_f32.val[0], 2); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 3)) += vgetq_lane_f32(mag_w0_f32.val[0], 3); - - // First bin - high - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 0)) += vgetq_lane_f32(mag_w0_f32.val[1], 0); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 1)) += vgetq_lane_f32(mag_w0_f32.val[1], 1); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 2)) += vgetq_lane_f32(mag_w0_f32.val[1], 2); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 3)) += vgetq_lane_f32(mag_w0_f32.val[1], 3); - - hidx_s32.val[0] = vaddq_s32(hidx_s32.val[0], one_s32); - hidx_s32.val[1] = vaddq_s32(hidx_s32.val[1], one_s32); - - // Check if the histogram index is equal to num_bins - mask.val[0] = vceqq_s32(hidx_s32.val[0], num_bins_s32); - mask.val[1] = vceqq_s32(hidx_s32.val[1], num_bins_s32); - - hidx_s32.val[0] = vbslq_s32(mask.val[0], zero_s32, hidx_s32.val[0]); - hidx_s32.val[1] = vbslq_s32(mask.val[1], zero_s32, hidx_s32.val[1]); - - // Second bin - Low - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 0)) += vgetq_lane_f32(mag_w1_f32.val[0], 0); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 1)) += vgetq_lane_f32(mag_w1_f32.val[0], 1); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 2)) += vgetq_lane_f32(mag_w1_f32.val[0], 2); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[0], 3)) += vgetq_lane_f32(mag_w1_f32.val[0], 3); - - // Second bin - high - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 0)) += vgetq_lane_f32(mag_w1_f32.val[1], 0); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 1)) += vgetq_lane_f32(mag_w1_f32.val[1], 1); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 2)) += vgetq_lane_f32(mag_w1_f32.val[1], 2); - *(output_ptr + vgetq_lane_s32(hidx_s32.val[1], 3)) += vgetq_lane_f32(mag_w1_f32.val[1], 3); - } - - for(; xc < static_cast(cell_width); xc++) - { - const float phase_value = *(phase_row_ptr + xc + yc * phase_stride) * phase_scale + 0.5f; - const float mag_value = *(mag_row_ptr + xc + yc * mag_stride); - - const float w1 = phase_value - std::floor(phase_value); - - // The quantised phase is the histogram index [0, num_bins - 1] - Round - // Check limit of histogram index. If hidx == num_bins, hidx = 0 - const size_t hidx = static_cast(phase_value) % num_bins; - - // Weighted vote between 2 bins - *(output_ptr + hidx) += mag_value * (1.0f - w1); - *(output_ptr + ((hidx + 1) % (num_bins))) += mag_value * w1; - } - } -} - -void l2_norm(const float *__restrict input_row_ptr, float *__restrict output_ptr, size_t input_stride, - size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, float l2_hyst_threshold) -{ - ARM_COMPUTE_UNUSED(l2_hyst_threshold); - - float sum = 0.0f; - float32x4_t sum_f32 = vdupq_n_f32(0.0f); - - // Compute L2-Norm - for(size_t yc = 0; yc < num_cells_per_block_height; ++yc) - { - const float *const hist_ptr = input_row_ptr + yc * input_stride; - - int32_t xc = 0; - - for(; xc <= static_cast(num_bins_block_x) - 16; xc += 16) - { - const float32x4x4_t input_value = - { - { - vld1q_f32(hist_ptr + xc + 0), - vld1q_f32(hist_ptr + xc + 4), - vld1q_f32(hist_ptr + xc + 8), - vld1q_f32(hist_ptr + xc + 12) - } - }; - - // Compute input_value^2 - sum_f32 = vmlaq_f32(sum_f32, input_value.val[0], input_value.val[0]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[1], input_value.val[1]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[2], input_value.val[2]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[3], input_value.val[3]); - - vst1q_f32(&output_ptr[xc + 0 + yc * num_bins_block_x], input_value.val[0]); - vst1q_f32(&output_ptr[xc + 4 + yc * num_bins_block_x], input_value.val[1]); - vst1q_f32(&output_ptr[xc + 8 + yc * num_bins_block_x], input_value.val[2]); - vst1q_f32(&output_ptr[xc + 12 + yc * num_bins_block_x], input_value.val[3]); - } - - // Compute left over - for(; xc < static_cast(num_bins_block_x); xc++) - { - const float input_value = hist_ptr[xc]; - - sum += input_value * input_value; - - output_ptr[xc + yc * num_bins_block_x] = input_value; - } - } - - sum += vgetq_lane_f32(sum_f32, 0); - sum += vgetq_lane_f32(sum_f32, 1); - sum += vgetq_lane_f32(sum_f32, 2); - sum += vgetq_lane_f32(sum_f32, 3); - - const float scale = 1.0f / (std::sqrt(sum) + num_bins_block * 0.1f); - const float32x4_t scale_f32 = vdupq_n_f32(scale); - - int32_t i = 0; - - for(; i <= static_cast(num_bins_block) - 16; i += 16) - { - float32x4x4_t input_value = - { - { - vld1q_f32(&output_ptr[i + 0]), - vld1q_f32(&output_ptr[i + 4]), - vld1q_f32(&output_ptr[i + 8]), - vld1q_f32(&output_ptr[i + 12]) - } - }; - - // Scale input_value - input_value.val[0] = vmulq_f32(input_value.val[0], scale_f32); - input_value.val[1] = vmulq_f32(input_value.val[1], scale_f32); - input_value.val[2] = vmulq_f32(input_value.val[2], scale_f32); - input_value.val[3] = vmulq_f32(input_value.val[3], scale_f32); - - vst1q_f32(&output_ptr[i + 0], input_value.val[0]); - vst1q_f32(&output_ptr[i + 4], input_value.val[1]); - vst1q_f32(&output_ptr[i + 8], input_value.val[2]); - vst1q_f32(&output_ptr[i + 12], input_value.val[3]); - } - - for(; i < static_cast(num_bins_block); ++i) - { - output_ptr[i] *= scale; - } -} - -void l2hys_norm(const float *__restrict input_row_ptr, float *__restrict output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, - float l2_hyst_threshold) -{ - float sum = 0.0f; - float32x4_t sum_f32 = vdupq_n_f32(0.0f); - - // Compute L2-Hys - for(size_t yc = 0; yc < num_cells_per_block_height; ++yc) - { - const float *const hist_ptr = input_row_ptr + yc * input_stride; - - int32_t xc = 0; - - for(; xc <= static_cast(num_bins_block_x) - 16; xc += 16) - { - const float32x4x4_t input_value = - { - { - vld1q_f32(hist_ptr + xc + 0), - vld1q_f32(hist_ptr + xc + 4), - vld1q_f32(hist_ptr + xc + 8), - vld1q_f32(hist_ptr + xc + 12) - } - }; - - // Compute input_value^2 - sum_f32 = vmlaq_f32(sum_f32, input_value.val[0], input_value.val[0]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[1], input_value.val[1]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[2], input_value.val[2]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[3], input_value.val[3]); - - vst1q_f32(&output_ptr[xc + 0 + yc * num_bins_block_x], input_value.val[0]); - vst1q_f32(&output_ptr[xc + 4 + yc * num_bins_block_x], input_value.val[1]); - vst1q_f32(&output_ptr[xc + 8 + yc * num_bins_block_x], input_value.val[2]); - vst1q_f32(&output_ptr[xc + 12 + yc * num_bins_block_x], input_value.val[3]); - } - - // Compute left over - for(; xc < static_cast(num_bins_block_x); ++xc) - { - const float input_value = hist_ptr[xc]; - - sum += input_value * input_value; - - output_ptr[xc + yc * num_bins_block_x] = input_value; - } - } - - sum += vgetq_lane_f32(sum_f32, 0); - sum += vgetq_lane_f32(sum_f32, 1); - sum += vgetq_lane_f32(sum_f32, 2); - sum += vgetq_lane_f32(sum_f32, 3); - - float scale = 1.0f / (std::sqrt(sum) + num_bins_block * 0.1f); - float32x4_t scale_f32 = vdupq_n_f32(scale); - const float32x4_t l2_hyst_threshold_f32 = vdupq_n_f32(l2_hyst_threshold); - - // Reset sum - sum_f32 = vdupq_n_f32(0.0f); - sum = 0.0f; - - int32_t i = 0; - - for(; i <= static_cast(num_bins_block) - 16; i += 16) - { - float32x4x4_t input_value = - { - { - vld1q_f32(&output_ptr[i + 0]), - vld1q_f32(&output_ptr[i + 4]), - vld1q_f32(&output_ptr[i + 8]), - vld1q_f32(&output_ptr[i + 12]) - } - }; - - // Scale input_value - input_value.val[0] = vmulq_f32(input_value.val[0], scale_f32); - input_value.val[1] = vmulq_f32(input_value.val[1], scale_f32); - input_value.val[2] = vmulq_f32(input_value.val[2], scale_f32); - input_value.val[3] = vmulq_f32(input_value.val[3], scale_f32); - - // Clip input_value if over _threshold_l2hys - input_value.val[0] = vminq_f32(input_value.val[0], l2_hyst_threshold_f32); - input_value.val[1] = vminq_f32(input_value.val[1], l2_hyst_threshold_f32); - input_value.val[2] = vminq_f32(input_value.val[2], l2_hyst_threshold_f32); - input_value.val[3] = vminq_f32(input_value.val[3], l2_hyst_threshold_f32); - - // Compute input_value^2 - sum_f32 = vmlaq_f32(sum_f32, input_value.val[0], input_value.val[0]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[1], input_value.val[1]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[2], input_value.val[2]); - sum_f32 = vmlaq_f32(sum_f32, input_value.val[3], input_value.val[3]); - - vst1q_f32(&output_ptr[i + 0], input_value.val[0]); - vst1q_f32(&output_ptr[i + 4], input_value.val[1]); - vst1q_f32(&output_ptr[i + 8], input_value.val[2]); - vst1q_f32(&output_ptr[i + 12], input_value.val[3]); - } - - sum += vgetq_lane_f32(sum_f32, 0); - sum += vgetq_lane_f32(sum_f32, 1); - sum += vgetq_lane_f32(sum_f32, 2); - sum += vgetq_lane_f32(sum_f32, 3); - - for(; i < static_cast(num_bins_block); ++i) - { - float input_value = output_ptr[i] * scale; - - // Clip scaled input_value if over _threshold_L2hys - input_value = std::min(input_value, l2_hyst_threshold); - - sum += input_value * input_value; - - output_ptr[i] = input_value; - } - - // We use the same constants of OpenCV - scale = 1.0f / (std::sqrt(sum) + 1e-3f); - scale_f32 = vdupq_n_f32(scale); - - // Rescale - i = 0; - - for(; i <= static_cast(num_bins_block) - 16; i += 16) - { - float32x4x4_t input_value = - { - { - vld1q_f32(&output_ptr[i + 0]), - vld1q_f32(&output_ptr[i + 4]), - vld1q_f32(&output_ptr[i + 8]), - vld1q_f32(&output_ptr[i + 12]) - } - }; - - // Scale input_value - input_value.val[0] = vmulq_f32(input_value.val[0], scale_f32); - input_value.val[1] = vmulq_f32(input_value.val[1], scale_f32); - input_value.val[2] = vmulq_f32(input_value.val[2], scale_f32); - input_value.val[3] = vmulq_f32(input_value.val[3], scale_f32); - - vst1q_f32(&output_ptr[i + 0], input_value.val[0]); - vst1q_f32(&output_ptr[i + 4], input_value.val[1]); - vst1q_f32(&output_ptr[i + 8], input_value.val[2]); - vst1q_f32(&output_ptr[i + 12], input_value.val[3]); - } - - for(; i < static_cast(num_bins_block); ++i) - { - // Store result - output_ptr[i] *= scale; - } -} - -void l1_norm(const float *__restrict input_row_ptr, float *__restrict output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, - float l2_hyst_threshold) -{ - ARM_COMPUTE_UNUSED(l2_hyst_threshold); - - float sum = 0.0f; - float32x4_t sum_f32 = vdupq_n_f32(0.0f); - - // Compute L1-Norm - for(size_t yc = 0; yc < num_cells_per_block_height; ++yc) - { - const float *const hist_ptr = input_row_ptr + yc * input_stride; - - int32_t xc = 0; - - for(; xc <= static_cast(num_bins_block_x) - 16; xc += 16) - { - const float32x4x4_t input_value = - { - { - vld1q_f32(hist_ptr + xc + 0), - vld1q_f32(hist_ptr + xc + 4), - vld1q_f32(hist_ptr + xc + 8), - vld1q_f32(hist_ptr + xc + 12) - } - }; - - // Compute |input_value| - sum_f32 += vabsq_f32(input_value.val[0]); - sum_f32 += vabsq_f32(input_value.val[1]); - sum_f32 += vabsq_f32(input_value.val[2]); - sum_f32 += vabsq_f32(input_value.val[3]); - - vst1q_f32(&output_ptr[xc + 0 + yc * num_bins_block_x], input_value.val[0]); - vst1q_f32(&output_ptr[xc + 4 + yc * num_bins_block_x], input_value.val[1]); - vst1q_f32(&output_ptr[xc + 8 + yc * num_bins_block_x], input_value.val[2]); - vst1q_f32(&output_ptr[xc + 12 + yc * num_bins_block_x], input_value.val[3]); - } - - for(; xc < static_cast(num_bins_block_x); xc++) - { - const float input_value = hist_ptr[xc]; - - sum += std::abs(input_value); - - output_ptr[xc + yc * num_bins_block_x] = input_value; - } - } - - sum += vgetq_lane_f32(sum_f32, 0); - sum += vgetq_lane_f32(sum_f32, 1); - sum += vgetq_lane_f32(sum_f32, 2); - sum += vgetq_lane_f32(sum_f32, 3); - - const float scale = 1.0f / (std::sqrt(sum) + num_bins_block * 0.1f); - const float32x4_t scale_f32 = vdupq_n_f32(scale); - - int32_t i = 0; - - for(; i <= static_cast(num_bins_block) - 16; i += 16) - { - float32x4x4_t input_value = - { - { - vld1q_f32(&output_ptr[i + 0]), - vld1q_f32(&output_ptr[i + 4]), - vld1q_f32(&output_ptr[i + 8]), - vld1q_f32(&output_ptr[i + 12]) - } - }; - - // Scale input_value - input_value.val[0] = vmulq_f32(input_value.val[0], scale_f32); - input_value.val[1] = vmulq_f32(input_value.val[1], scale_f32); - input_value.val[2] = vmulq_f32(input_value.val[2], scale_f32); - input_value.val[3] = vmulq_f32(input_value.val[3], scale_f32); - - vst1q_f32(&output_ptr[i + 0], input_value.val[0]); - vst1q_f32(&output_ptr[i + 4], input_value.val[1]); - vst1q_f32(&output_ptr[i + 8], input_value.val[2]); - vst1q_f32(&output_ptr[i + 12], input_value.val[3]); - } - - for(; i < static_cast(num_bins_block); ++i) - { - output_ptr[i] *= scale; - } -} -} // namespace - -NEHOGOrientationBinningKernel::NEHOGOrientationBinningKernel() - : _func(nullptr), _input_magnitude(nullptr), _input_phase(nullptr), _output(nullptr), _cell_width(0), _cell_height(0), _num_bins(0), _phase_scale(0) -{ -} - -void NEHOGOrientationBinningKernel::configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_magnitude, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_phase, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(hog_info == nullptr); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, hog_info->num_bins(), DataType::F32); - ARM_COMPUTE_ERROR_ON(input_magnitude->info()->dimension(Window::DimX) != input_phase->info()->dimension(Window::DimX)); - ARM_COMPUTE_ERROR_ON(input_magnitude->info()->dimension(Window::DimY) != input_phase->info()->dimension(Window::DimY)); - - _input_magnitude = input_magnitude; - _input_phase = input_phase; - _output = output; - _cell_width = hog_info->cell_size().width; - _cell_height = hog_info->cell_size().height; - _num_bins = hog_info->num_bins(); - _phase_scale = (PhaseType::SIGNED == hog_info->phase_type() ? _num_bins / 360.0f : _num_bins / 180.0f); - _phase_scale *= (PhaseType::SIGNED == hog_info->phase_type() ? 360.0f / 255.0f : 1.0f); - - if(_cell_width < 8) - { - _func = &cell_width_lt8; - } - else - { - _func = &cell_width_ge8; - } - - constexpr unsigned int num_elems_processed_per_iteration = 1; - const unsigned int num_elems_read_per_iteration = 1; - const unsigned int num_rows_read_per_iteration = _cell_height; - const unsigned int num_elems_written_per_iteration = 1; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input_magnitude->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration), - AccessWindowRectangle(input_phase->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NEHOGOrientationBinningKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - const size_t mag_stride = _input_magnitude->info()->strides_in_bytes()[Window::DimY] / pixel_size_from_format(_input_magnitude->info()->format()); - const size_t phase_stride = _input_phase->info()->strides_in_bytes()[Window::DimY] / pixel_size_from_format(_input_phase->info()->format()); - - Window win_mag(window); - win_mag.set(Window::DimX, Window::Dimension(window.x().start() * _cell_width, window.x().start() * _cell_width, _cell_width)); - win_mag.set(Window::DimY, Window::Dimension(window.y().start() * _cell_height, window.y().start() * _cell_height, _cell_height)); - - Window win_phase(win_mag); - - Iterator mag(_input_magnitude, win_mag); - Iterator phase(_input_phase, win_phase); - Iterator out(_output, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const auto mag_row_ptr = reinterpret_cast(mag.ptr()); - const auto phase_row_ptr = reinterpret_cast(phase.ptr()); - const auto out_row_ptr = reinterpret_cast(out.ptr()); - - (*_func)(mag_row_ptr, phase_row_ptr, out_row_ptr, mag_stride, phase_stride, _cell_width, _cell_height, _num_bins, _phase_scale); - }, - mag, phase, out); -} - -NEHOGBlockNormalizationKernel::NEHOGBlockNormalizationKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _num_cells_per_block(), _num_cells_per_block_stride(), _num_bins(0), _l2_hyst_threshold(0.0f) -{ -} - -void NEHOGBlockNormalizationKernel::configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info) -{ - ARM_COMPUTE_ERROR_ON(hog_info == nullptr); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, hog_info->num_bins(), DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32); - - // Number of cells per block - const Size2D num_cells_per_block(hog_info->block_size().width / hog_info->cell_size().width, - hog_info->block_size().height / hog_info->cell_size().height); - - // Number of cells per block stride - const Size2D num_cells_per_block_stride(hog_info->block_stride().width / hog_info->cell_size().width, - hog_info->block_stride().height / hog_info->cell_size().height); - - _input = input; - _output = output; - _l2_hyst_threshold = hog_info->l2_hyst_threshold(); - _num_cells_per_block = num_cells_per_block; - _num_cells_per_block_stride = num_cells_per_block_stride; - _num_bins = hog_info->num_bins(); - - ARM_COMPUTE_ERROR_ON((output->info()->num_channels() != (_num_bins * num_cells_per_block.width * num_cells_per_block.height))); - - switch(hog_info->normalization_type()) - { - case HOGNormType::L2_NORM: - _func = &l2_norm; - break; - case HOGNormType::L2HYS_NORM: - _func = &l2hys_norm; - break; - case HOGNormType::L1_NORM: - _func = &l1_norm; - break; - default: - ARM_COMPUTE_ERROR_ON("Normalisation type not supported"); - break; - } - - constexpr unsigned int num_elems_processed_per_iteration = 1; - const unsigned int num_elems_read_per_iteration = 1; - const unsigned int num_rows_read_per_iteration = _num_cells_per_block.height; - const unsigned int num_elems_written_per_iteration = 1; - const unsigned int num_rows_written_per_iteration = _num_cells_per_block.height; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NEHOGBlockNormalizationKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - // Get number of bins per block - const size_t num_bins_per_block = _output->info()->num_channels(); - - // Number of bins on the same row of the block - const int32_t num_bins_per_block_x = _num_cells_per_block.width * _num_bins; - - const size_t input_stride = _input->info()->strides_in_bytes()[Window::DimY] / data_size_from_type(_input->info()->data_type()); - - Window win_in(window); - win_in.set_dimension_step(Window::DimX, _num_cells_per_block_stride.width); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - - // Normalises blocks - execute_window_loop(window, [&](const Coordinates & id) - { - const auto input_row_ptr = reinterpret_cast(in.ptr() + id.y() * _num_cells_per_block_stride.height * _input->info()->strides_in_bytes()[Window::DimY]); - const auto out_row_ptr = reinterpret_cast(out.ptr()); - - // Execute normalization function - (*_func)(input_row_ptr, out_row_ptr, input_stride, _num_cells_per_block.height, num_bins_per_block_x, num_bins_per_block, _l2_hyst_threshold); - }, - in, out); -} diff --git a/src/core/NEON/kernels/NEHOGDescriptorKernel.h b/src/core/NEON/kernels/NEHOGDescriptorKernel.h deleted file mode 100644 index e9cd47b099..0000000000 --- a/src/core/NEON/kernels/NEHOGDescriptorKernel.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H -#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H - -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/Size2D.h" -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Neon kernel to perform HOG Orientation Binning */ -class NEHOGOrientationBinningKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGOrientationBinningKernel"; - } - /** Default constructor */ - NEHOGOrientationBinningKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; - /** Default destructor */ - ~NEHOGOrientationBinningKernel() = default; - - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised block normalization functions - * - * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor - * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor - * @param[out] output_ptr Pointer to the output cell of hog space tensor - * @param[in] mag_stride Stride of the magnitude tensor - * @param[in] phase_stride Stride of the phase tensor - * @param[in] cell_width Width of the cell - * @param[in] cell_height Height of the cell - * @param[in] num_bins Number of bins for each cell - * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index - */ - using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, - size_t cell_height, size_t num_bins, float phase_scale); - /** Orientation binning function to use for the particular cell width passed to configure() */ - OrientBinFunc *_func; - const ITensor *_input_magnitude; - const ITensor *_input_phase; - ITensor *_output; - size_t _cell_width; - size_t _cell_height; - size_t _num_bins; - float _phase_scale; -}; - -/** Neon kernel to perform HOG block normalization */ -class NEHOGBlockNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGBlockNormalizationKernel"; - } - /** Default constructor */ - NEHOGBlockNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; - /** Default destructor */ - ~NEHOGBlockNormalizationKernel() = default; - - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised block normalization functions - * - * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor - * @param[out] output_ptr Pointer to the output block of the hog normalized space - * @param[in] input_stride Stride of the input hog space tensor - * @param[in] num_cells_per_block_height Number of cells per block along the Y direction - * @param[in] num_bins_block_x Number of bins per block along the X direction - * @param[in] num_bins_block Number of total bins per block - * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization - */ - using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, - float l2_hyst_threshold); - /** Block normalization function to use for the particular normalization type passed to configure() */ - BlockNormFunc *_func; - const ITensor *_input; - ITensor *_output; - Size2D _num_cells_per_block; - Size2D _num_cells_per_block_stride; - size_t _num_bins; - float _l2_hyst_threshold; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp b/src/core/NEON/kernels/NEHOGDetectorKernel.cpp deleted file mode 100644 index cba1d5538a..0000000000 --- a/src/core/NEON/kernels/NEHOGDetectorKernel.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/HOGInfo.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -using namespace arm_compute; - -NEHOGDetectorKernel::NEHOGDetectorKernel() - : _input(nullptr), _detection_windows(), _hog_descriptor(nullptr), _bias(0.0f), _threshold(0.0f), _idx_class(0), _num_bins_per_descriptor_x(0), _num_blocks_per_descriptor_y(0), _block_stride_width(0), - _block_stride_height(0), _detection_window_width(0), _detection_window_height(0), _max_num_detection_windows(0), _mutex() -{ -} - -void NEHOGDetectorKernel::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, uint16_t idx_class) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F32); - ARM_COMPUTE_ERROR_ON(hog == nullptr); - ARM_COMPUTE_ERROR_ON(detection_windows == nullptr); - ARM_COMPUTE_ERROR_ON((detection_window_stride.width % hog->info()->block_stride().width) != 0); - ARM_COMPUTE_ERROR_ON((detection_window_stride.height % hog->info()->block_stride().height) != 0); - - const Size2D &detection_window_size = hog->info()->detection_window_size(); - const Size2D &block_size = hog->info()->block_size(); - const Size2D &block_stride = hog->info()->block_stride(); - - _input = input; - _detection_windows = detection_windows; - _threshold = threshold; - _idx_class = idx_class; - _hog_descriptor = hog->descriptor(); - _bias = _hog_descriptor[hog->info()->descriptor_size() - 1]; - _num_bins_per_descriptor_x = ((detection_window_size.width - block_size.width) / block_stride.width + 1) * input->info()->num_channels(); - _num_blocks_per_descriptor_y = (detection_window_size.height - block_size.height) / block_stride.height + 1; - _block_stride_width = block_stride.width; - _block_stride_height = block_stride.height; - _detection_window_width = detection_window_size.width; - _detection_window_height = detection_window_size.height; - _max_num_detection_windows = detection_windows->max_num_values(); - - ARM_COMPUTE_ERROR_ON((_num_bins_per_descriptor_x * _num_blocks_per_descriptor_y + 1) != hog->info()->descriptor_size()); - - // Get the number of blocks along the x and y directions of the input tensor - const ValidRegion &valid_region = input->info()->valid_region(); - const size_t num_blocks_x = valid_region.shape[0]; - const size_t num_blocks_y = valid_region.shape[1]; - - // Get the number of blocks along the x and y directions of the detection window - const size_t num_blocks_per_detection_window_x = detection_window_size.width / block_stride.width; - const size_t num_blocks_per_detection_window_y = detection_window_size.height / block_stride.height; - - const size_t window_step_x = detection_window_stride.width / block_stride.width; - const size_t window_step_y = detection_window_stride.height / block_stride.height; - - // Configure kernel window - Window win; - win.set(Window::DimX, Window::Dimension(0, floor_to_multiple(num_blocks_x - num_blocks_per_detection_window_x, window_step_x) + window_step_x, window_step_x)); - win.set(Window::DimY, Window::Dimension(0, floor_to_multiple(num_blocks_y - num_blocks_per_detection_window_y, window_step_y) + window_step_y, window_step_y)); - - constexpr unsigned int num_elems_read_per_iteration = 1; - const unsigned int num_rows_read_per_iteration = _num_blocks_per_descriptor_y; - - update_window_and_padding(win, AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration)); - - INEKernel::configure(win); -} - -void NEHOGDetectorKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_hog_descriptor == nullptr); - - const size_t in_step_y = _input->info()->strides_in_bytes()[Window::DimY] / data_size_from_type(_input->info()->data_type()); - - Iterator in(_input, window); - - execute_window_loop(window, [&](const Coordinates & id) - { - const auto *in_row_ptr = reinterpret_cast(in.ptr()); - - // Init score_f32 with 0 - float32x4_t score_f32 = vdupq_n_f32(0.0f); - - // Init score with bias - float score = _bias; - - // Compute Linear SVM - for(size_t yb = 0; yb < _num_blocks_per_descriptor_y; ++yb, in_row_ptr += in_step_y) - { - int32_t xb = 0; - - const int32_t offset_y = yb * _num_bins_per_descriptor_x; - - for(; xb < static_cast(_num_bins_per_descriptor_x) - 16; xb += 16) - { - // Load descriptor values - const float32x4x4_t a_f32 = - { - { - vld1q_f32(&in_row_ptr[xb + 0]), - vld1q_f32(&in_row_ptr[xb + 4]), - vld1q_f32(&in_row_ptr[xb + 8]), - vld1q_f32(&in_row_ptr[xb + 12]) - } - }; - - // Load detector values - const float32x4x4_t b_f32 = - { - { - vld1q_f32(&_hog_descriptor[xb + 0 + offset_y]), - vld1q_f32(&_hog_descriptor[xb + 4 + offset_y]), - vld1q_f32(&_hog_descriptor[xb + 8 + offset_y]), - vld1q_f32(&_hog_descriptor[xb + 12 + offset_y]) - } - }; - - // Multiply accumulate - score_f32 = vmlaq_f32(score_f32, a_f32.val[0], b_f32.val[0]); - score_f32 = vmlaq_f32(score_f32, a_f32.val[1], b_f32.val[1]); - score_f32 = vmlaq_f32(score_f32, a_f32.val[2], b_f32.val[2]); - score_f32 = vmlaq_f32(score_f32, a_f32.val[3], b_f32.val[3]); - } - - for(; xb < static_cast(_num_bins_per_descriptor_x); ++xb) - { - const float a = in_row_ptr[xb]; - const float b = _hog_descriptor[xb + offset_y]; - - score += a * b; - } - } - - score += vgetq_lane_f32(score_f32, 0); - score += vgetq_lane_f32(score_f32, 1); - score += vgetq_lane_f32(score_f32, 2); - score += vgetq_lane_f32(score_f32, 3); - - if(score > _threshold) - { - if(_detection_windows->num_values() < _max_num_detection_windows) - { - DetectionWindow win; - win.x = (id.x() * _block_stride_width); - win.y = (id.y() * _block_stride_height); - win.width = _detection_window_width; - win.height = _detection_window_height; - win.idx_class = _idx_class; - win.score = score; - - arm_compute::unique_lock lock(_mutex); - _detection_windows->push_back(win); - lock.unlock(); - } - } - }, - in); -} diff --git a/src/core/NEON/kernels/NEHOGDetectorKernel.h b/src/core/NEON/kernels/NEHOGDetectorKernel.h deleted file mode 100644 index e4c699fbfb..0000000000 --- a/src/core/NEON/kernels/NEHOGDetectorKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H -#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "src/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -namespace arm_compute -{ -class ITensor; - -/** Neon kernel to perform HOG detector kernel using linear SVM */ -class NEHOGDetectorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGDetectorKernel"; - } - /** Default constructor */ - NEHOGDetectorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete; - /** Default destructor */ - ~NEHOGDetectorKernel() = default; - - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - IDetectionWindowArray *_detection_windows; - const float *_hog_descriptor; - float _bias; - float _threshold; - uint16_t _idx_class; - size_t _num_bins_per_descriptor_x; - size_t _num_blocks_per_descriptor_y; - size_t _block_stride_width; - size_t _block_stride_height; - size_t _detection_window_width; - size_t _detection_window_height; - size_t _max_num_detection_windows; - arm_compute::Mutex _mutex; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp deleted file mode 100644 index 4159e434b2..0000000000 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ /dev/null @@ -1,817 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -using namespace arm_compute; - -template class arm_compute::NEHarrisScoreKernel<3>; -template class arm_compute::NEHarrisScoreKernel<5>; -template class arm_compute::NEHarrisScoreKernel<7>; -template arm_compute::NEHarrisScoreKernel<3>::NEHarrisScoreKernel(); -template arm_compute::NEHarrisScoreKernel<5>::NEHarrisScoreKernel(); -template arm_compute::NEHarrisScoreKernel<7>::NEHarrisScoreKernel(); - -namespace -{ -inline float32x4_t harris_score(float32x4_t gx2, float32x4_t gy2, float32x4_t gxgy, float32x4_t sensitivity, float32x4_t strength_thresh) -{ - // Trace^2 - float32x4_t trace2 = vaddq_f32(gx2, gy2); - trace2 = vmulq_f32(trace2, trace2); - - // Det(A) - float32x4_t det = vmulq_f32(gx2, gy2); - det = vmlsq_f32(det, gxgy, gxgy); - - // Det(A) - sensitivity * trace^2 - const float32x4_t mc = vmlsq_f32(det, sensitivity, trace2); - - // mc > strength_thresh - const uint32x4_t mask = vcgtq_f32(mc, strength_thresh); - - return vbslq_f32(mask, mc, vdupq_n_f32(0.0f)); -} - -inline void harris_score1x3_FLOAT_FLOAT_FLOAT(float32x4_t low_gx, float32x4_t low_gy, float32x4_t high_gx, float32x4_t high_gy, float32x4_t &gx2, float32x4_t &gy2, float32x4_t &gxgy, - float32x4_t norm_factor) -{ - // Normalize - low_gx = vmulq_f32(low_gx, norm_factor); - low_gy = vmulq_f32(low_gy, norm_factor); - high_gx = vmulq_f32(high_gx, norm_factor); - high_gy = vmulq_f32(high_gy, norm_factor); - - const float32x4_t l_gx = low_gx; - const float32x4_t l_gy = low_gy; - const float32x4_t m_gx = vextq_f32(low_gx, high_gx, 1); - const float32x4_t m_gy = vextq_f32(low_gy, high_gy, 1); - const float32x4_t r_gx = vextq_f32(low_gx, high_gx, 2); - const float32x4_t r_gy = vextq_f32(low_gy, high_gy, 2); - - // Gx*Gx - gx2 = vmlaq_f32(gx2, l_gx, l_gx); - gx2 = vmlaq_f32(gx2, m_gx, m_gx); - gx2 = vmlaq_f32(gx2, r_gx, r_gx); - - // Gy*Gy - gy2 = vmlaq_f32(gy2, l_gy, l_gy); - gy2 = vmlaq_f32(gy2, m_gy, m_gy); - gy2 = vmlaq_f32(gy2, r_gy, r_gy); - - // Gx*Gy - gxgy = vmlaq_f32(gxgy, l_gx, l_gy); - gxgy = vmlaq_f32(gxgy, m_gx, m_gy); - gxgy = vmlaq_f32(gxgy, r_gx, r_gy); -} - -inline void harris_score1x5_FLOAT_FLOAT_FLOAT(float32x4_t low_gx, float32x4_t low_gy, float32x4_t high_gx, float32x4_t high_gy, float32x4_t &gx2, float32x4_t &gy2, float32x4_t &gxgy, - float32x4_t norm_factor) -{ - // Normalize - low_gx = vmulq_f32(low_gx, norm_factor); - low_gy = vmulq_f32(low_gy, norm_factor); - high_gx = vmulq_f32(high_gx, norm_factor); - high_gy = vmulq_f32(high_gy, norm_factor); - - // L2 values - float32x4_t gx = low_gx; - float32x4_t gy = low_gy; - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // L1 values - gx = vextq_f32(low_gx, high_gx, 1); - gy = vextq_f32(low_gy, high_gy, 1); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // M values - gx = vextq_f32(low_gx, high_gx, 2); - gy = vextq_f32(low_gy, high_gy, 2); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // R1 values - gx = vextq_f32(low_gx, high_gx, 3); - gy = vextq_f32(low_gy, high_gy, 3); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // R2 values - gx = high_gx; - gy = high_gy; - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); -} - -inline void harris_score1x7_FLOAT_FLOAT_FLOAT(float32x4_t low_gx, float32x4_t low_gy, float32x4_t high_gx, float32x4_t high_gy, float32x4_t high_gx1, float32x4_t high_gy1, float32x4_t &gx2, - float32x4_t &gy2, float32x4_t &gxgy, float32x4_t norm_factor) -{ - // Normalize - low_gx = vmulq_f32(low_gx, norm_factor); - low_gy = vmulq_f32(low_gy, norm_factor); - high_gx = vmulq_f32(high_gx, norm_factor); - high_gy = vmulq_f32(high_gy, norm_factor); - - // L3 values - float32x4_t gx = low_gx; - float32x4_t gy = low_gy; - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // L2 values - gx = vextq_f32(low_gx, high_gx, 1); - gy = vextq_f32(low_gy, high_gy, 1); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // L1 values - gx = vextq_f32(low_gx, high_gx, 2); - gy = vextq_f32(low_gy, high_gy, 2); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // M values - gx = vextq_f32(low_gx, high_gx, 3); - gy = vextq_f32(low_gy, high_gy, 3); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // R1 values - gx = high_gx; - gy = high_gy; - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // Change tmp_low and tmp_high for calculating R2 and R3 values - low_gx = high_gx; - low_gy = high_gy; - high_gx = high_gx1; - high_gy = high_gy1; - - // Normalize - high_gx = vmulq_f32(high_gx, norm_factor); - high_gy = vmulq_f32(high_gy, norm_factor); - - // R2 values - gx = vextq_f32(low_gx, high_gx, 1); - gy = vextq_f32(low_gy, high_gy, 1); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); - - // R3 values - gx = vextq_f32(low_gx, high_gx, 2); - gy = vextq_f32(low_gy, high_gy, 2); - - // Accumulate - gx2 = vmlaq_f32(gx2, gx, gx); - gy2 = vmlaq_f32(gy2, gy, gy); - gxgy = vmlaq_f32(gxgy, gx, gy); -} - -inline void harris_score3x3_S16_S16_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float in_norm_factor, float in_sensitivity, float in_strength_thresh) - -{ - const auto gx_ptr_0 = static_cast(input1_ptr) - 1; - const auto gy_ptr_0 = static_cast(input2_ptr) - 1; - const int16_t *gx_ptr_1 = gx_ptr_0 + 4; - const int16_t *gy_ptr_1 = gy_ptr_0 + 4; - const auto output = static_cast(output_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float32x4x2_t gx2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gy2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gxgy = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - - // Row0 - int16x8x2_t tmp_gx = - { - { - vld1q_s16(gx_ptr_0 - input_stride), - vld1q_s16(gx_ptr_1 - input_stride) - } - }; - int16x8x2_t tmp_gy = - { - { - vld1q_s16(gy_ptr_0 - input_stride), - vld1q_s16(gy_ptr_1 - input_stride) - } - }; - float32x4_t sensitivity = vdupq_n_f32(in_sensitivity); - float32x4_t norm_factor = vdupq_n_f32(in_norm_factor); - float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh); - - float32x4_t low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0]))); - float32x4_t low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0]))); - float32x4_t high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0]))); - float32x4_t high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0]))); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1]))); - low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1]))); - high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1]))); - high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1]))); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Row1 - tmp_gx.val[0] = vld1q_s16(gx_ptr_0); - tmp_gy.val[0] = vld1q_s16(gy_ptr_0); - tmp_gx.val[1] = vld1q_s16(gx_ptr_1); - tmp_gy.val[1] = vld1q_s16(gy_ptr_1); - - low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0]))); - low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0]))); - high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0]))); - high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0]))); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1]))); - low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1]))); - high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1]))); - high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1]))); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Row2 - tmp_gx.val[0] = vld1q_s16(gx_ptr_0 + input_stride); - tmp_gy.val[0] = vld1q_s16(gy_ptr_0 + input_stride); - tmp_gx.val[1] = vld1q_s16(gx_ptr_1 + input_stride); - tmp_gy.val[1] = vld1q_s16(gy_ptr_1 + input_stride); - - low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0]))); - low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0]))); - high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0]))); - high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0]))); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1]))); - low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1]))); - high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1]))); - high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1]))); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Calculate harris score - const float32x4x2_t mc = - { - { - harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh), - harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh) - } - }; - - // Store score - vst1q_f32(output + 0, mc.val[0]); - vst1q_f32(output + 4, mc.val[1]); -} - -inline void harris_score3x3_S32_S32_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float in_norm_factor, float in_sensitivity, float in_strength_thresh) -{ - auto gx_ptr_0 = static_cast(input1_ptr) - 1; - auto gy_ptr_0 = static_cast(input2_ptr) - 1; - const int32_t *gx_ptr_1 = gx_ptr_0 + 4; - const int32_t *gy_ptr_1 = gy_ptr_0 + 4; - const int32_t *gx_ptr_2 = gx_ptr_0 + 8; - const int32_t *gy_ptr_2 = gy_ptr_0 + 8; - const auto output = static_cast(output_ptr); - float32x4_t sensitivity = vdupq_n_f32(in_sensitivity); - float32x4_t norm_factor = vdupq_n_f32(in_norm_factor); - float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh); - - // Gx^2, Gy^2 and Gx*Gy - float32x4x2_t gx2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gy2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gxgy = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - - // Row0 - float32x4_t low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0 - input_stride)); - float32x4_t low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0 - input_stride)); - float32x4_t high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 - input_stride)); - float32x4_t high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 - input_stride)); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 - input_stride)); - low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 - input_stride)); - high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_2 - input_stride)); - high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_2 - input_stride)); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Row1 - low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0)); - low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0)); - high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1)); - high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1)); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1)); - low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1)); - high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_2)); - high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_2)); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Row2 - low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0 + input_stride)); - low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0 + input_stride)); - high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 + input_stride)); - high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 + input_stride)); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1 + input_stride)); - low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1 + input_stride)); - high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_2 + input_stride)); - high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_2 + input_stride)); - harris_score1x3_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Calculate harris score - const float32x4x2_t mc = - { - { - harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh), - harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh) - } - }; - - // Store score - vst1q_f32(output + 0, mc.val[0]); - vst1q_f32(output + 4, mc.val[1]); -} - -inline void harris_score5x5_S16_S16_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float in_norm_factor, float in_sensitivity, float in_strength_thresh) -{ - auto gx_ptr_0 = static_cast(input1_ptr) - 2 - 2 * input_stride; - auto gy_ptr_0 = static_cast(input2_ptr) - 2 - 2 * input_stride; - const int16_t *gx_ptr_1 = gx_ptr_0 + 4; - const int16_t *gy_ptr_1 = gy_ptr_0 + 4; - const auto output = static_cast(output_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float32x4x2_t gx2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gy2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gxgy = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4_t sensitivity = vdupq_n_f32(in_sensitivity); - float32x4_t norm_factor = vdupq_n_f32(in_norm_factor); - float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh); - - for(int i = 0; i < 5; ++i) - { - const int16x8x2_t tmp_gx = - { - { - vld1q_s16(gx_ptr_0), - vld1q_s16(gx_ptr_1) - } - }; - const int16x8x2_t tmp_gy = - { - { - vld1q_s16(gy_ptr_0), - vld1q_s16(gy_ptr_1) - } - }; - - float32x4_t low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[0]))); - float32x4_t low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[0]))); - float32x4_t high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[0]))); - float32x4_t high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[0]))); - harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gx.val[1]))); - low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp_gy.val[1]))); - high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gx.val[1]))); - high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp_gy.val[1]))); - harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Update gx and gy pointer - gx_ptr_0 += input_stride; - gy_ptr_0 += input_stride; - gx_ptr_1 += input_stride; - gy_ptr_1 += input_stride; - } - - // Calculate harris score - const float32x4x2_t mc = - { - { - harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh), - harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh) - } - }; - - // Store score - vst1q_f32(output + 0, mc.val[0]); - vst1q_f32(output + 4, mc.val[1]); -} - -inline void harris_score5x5_S32_S32_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float in_norm_factor, float in_sensitivity, float in_strength_thresh) - -{ - auto gx_ptr_0 = static_cast(input1_ptr) - 2 - 2 * input_stride; - auto gy_ptr_0 = static_cast(input2_ptr) - 2 - 2 * input_stride; - const int32_t *gx_ptr_1 = gx_ptr_0 + 4; - const int32_t *gy_ptr_1 = gy_ptr_0 + 4; - const int32_t *gx_ptr_2 = gx_ptr_0 + 8; - const int32_t *gy_ptr_2 = gy_ptr_0 + 8; - const auto output = static_cast(output_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float32x4x2_t gx2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gy2 = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4x2_t gxgy = - { - { - vdupq_n_f32(0.0f), - vdupq_n_f32(0.0f) - } - }; - float32x4_t sensitivity = vdupq_n_f32(in_sensitivity); - float32x4_t norm_factor = vdupq_n_f32(in_norm_factor); - float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh); - - for(int i = 0; i < 5; ++i) - { - const float32x4_t low_gx_0 = vcvtq_f32_s32(vld1q_s32(gx_ptr_0)); - const float32x4_t low_gy_0 = vcvtq_f32_s32(vld1q_s32(gy_ptr_0)); - const float32x4_t high_gx_0 = vcvtq_f32_s32(vld1q_s32(gx_ptr_1)); - const float32x4_t high_gy_0 = vcvtq_f32_s32(vld1q_s32(gy_ptr_1)); - harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx_0, low_gy_0, high_gx_0, high_gy_0, gx2.val[0], gy2.val[0], gxgy.val[0], norm_factor); - - const float32x4_t low_gx_1 = vcvtq_f32_s32(vld1q_s32(gx_ptr_1)); - const float32x4_t low_gy_1 = vcvtq_f32_s32(vld1q_s32(gy_ptr_1)); - const float32x4_t high_gx_1 = vcvtq_f32_s32(vld1q_s32(gx_ptr_2)); - const float32x4_t high_gy_1 = vcvtq_f32_s32(vld1q_s32(gy_ptr_2)); - harris_score1x5_FLOAT_FLOAT_FLOAT(low_gx_1, low_gy_1, high_gx_1, high_gy_1, gx2.val[1], gy2.val[1], gxgy.val[1], norm_factor); - - // Update gx and gy pointer - gx_ptr_0 += input_stride; - gy_ptr_0 += input_stride; - gx_ptr_1 += input_stride; - gy_ptr_1 += input_stride; - gx_ptr_2 += input_stride; - gy_ptr_2 += input_stride; - } - - // Calculate harris score - const float32x4x2_t mc = - { - { - harris_score(gx2.val[0], gy2.val[0], gxgy.val[0], sensitivity, strength_thresh), - harris_score(gx2.val[1], gy2.val[1], gxgy.val[1], sensitivity, strength_thresh) - } - }; - - // Store score - vst1q_f32(output + 0, mc.val[0]); - vst1q_f32(output + 4, mc.val[1]); -} - -inline void harris_score7x7_S16_S16_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float in_norm_factor, float in_sensitivity, float in_strength_thresh) -{ - auto gx_ptr_0 = static_cast(input1_ptr) - 3 - 3 * input_stride; - auto gy_ptr_0 = static_cast(input2_ptr) - 3 - 3 * input_stride; - const int16_t *gx_ptr_1 = gx_ptr_0 + 8; - const int16_t *gy_ptr_1 = gy_ptr_0 + 8; - const auto output = static_cast(output_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float32x4_t gx2 = vdupq_n_f32(0.0f); - float32x4_t gy2 = vdupq_n_f32(0.0f); - float32x4_t gxgy = vdupq_n_f32(0.0f); - float32x4_t sensitivity = vdupq_n_f32(in_sensitivity); - float32x4_t norm_factor = vdupq_n_f32(in_norm_factor); - float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh); - - for(int i = 0; i < 7; ++i) - { - const int16x8_t tmp0_gx = vld1q_s16(gx_ptr_0); - const int16x8_t tmp0_gy = vld1q_s16(gy_ptr_0); - const int16x4_t tmp1_gx = vld1_s16(gx_ptr_1); - const int16x4_t tmp1_gy = vld1_s16(gy_ptr_1); - - float32x4_t low_gx = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp0_gx))); - float32x4_t low_gy = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp0_gy))); - float32x4_t high_gx = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp0_gx))); - float32x4_t high_gy = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp0_gy))); - float32x4_t high_gx1 = vcvtq_f32_s32(vmovl_s16(tmp1_gx)); - float32x4_t high_gy1 = vcvtq_f32_s32(vmovl_s16(tmp1_gy)); - harris_score1x7_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, high_gx1, high_gy1, gx2, gy2, gxgy, norm_factor); - - // Update gx and gy pointer - gx_ptr_0 += input_stride; - gy_ptr_0 += input_stride; - gx_ptr_1 += input_stride; - gy_ptr_1 += input_stride; - } - - // Calculate harris score - const float32x4_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh); - - // Store score - vst1q_f32(output, mc); -} - -inline void harris_score7x7_S32_S32_FLOAT(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float in_norm_factor, float in_sensitivity, float in_strength_thresh) -{ - auto gx_ptr_0 = static_cast(input1_ptr) - 3 - 3 * input_stride; - auto gy_ptr_0 = static_cast(input2_ptr) - 3 - 3 * input_stride; - const int32_t *gx_ptr_1 = gx_ptr_0 + 4; - const int32_t *gy_ptr_1 = gy_ptr_0 + 4; - const int32_t *gx_ptr_2 = gx_ptr_1 + 4; - const int32_t *gy_ptr_2 = gy_ptr_1 + 4; - const auto output = static_cast(output_ptr); - - // Gx^2, Gy^2 and Gx*Gy - float32x4_t gx2 = vdupq_n_f32(0.0f); - float32x4_t gy2 = vdupq_n_f32(0.0f); - float32x4_t gxgy = vdupq_n_f32(0.0f); - float32x4_t sensitivity = vdupq_n_f32(in_sensitivity); - float32x4_t norm_factor = vdupq_n_f32(in_norm_factor); - float32x4_t strength_thresh = vdupq_n_f32(in_strength_thresh); - - for(int i = 0; i < 7; ++i) - { - const float32x4_t low_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_0)); - const float32x4_t low_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_0)); - const float32x4_t high_gx = vcvtq_f32_s32(vld1q_s32(gx_ptr_1)); - const float32x4_t high_gy = vcvtq_f32_s32(vld1q_s32(gy_ptr_1)); - const float32x4_t high_gx1 = vcvtq_f32_s32(vld1q_s32(gx_ptr_2)); - const float32x4_t high_gy1 = vcvtq_f32_s32(vld1q_s32(gy_ptr_2)); - harris_score1x7_FLOAT_FLOAT_FLOAT(low_gx, low_gy, high_gx, high_gy, high_gx1, high_gy1, gx2, gy2, gxgy, norm_factor); - - // Update gx and gy pointer - gx_ptr_0 += input_stride; - gy_ptr_0 += input_stride; - gx_ptr_1 += input_stride; - gy_ptr_1 += input_stride; - gx_ptr_2 += input_stride; - gy_ptr_2 += input_stride; - } - - // Calculate harris score - const float32x4_t mc = harris_score(gx2, gy2, gxgy, sensitivity, strength_thresh); - - // Store score - vst1q_f32(output, mc); -} - -} // namespace - -INEHarrisScoreKernel::INEHarrisScoreKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr), _sensitivity(0.0f), _strength_thresh(0.0f), _norm_factor(0.0f), _border_size() -{ -} - -template -NEHarrisScoreKernel::NEHarrisScoreKernel() - : INEHarrisScoreKernel(), _func(nullptr) -{ -} - -template -void NEHarrisScoreKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - Iterator input1(_input1, window); - Iterator input2(_input2, window); - Iterator output(_output, window); - - const size_t input_stride = _input1->info()->strides_in_bytes()[1] / element_size_from_data_type(_input1->info()->data_type()); - - execute_window_loop(window, [&](const Coordinates &) - { - (*_func)(input1.ptr(), input2.ptr(), output.ptr(), input_stride, _norm_factor, _sensitivity, _strength_thresh); - }, - input1, input2, output); -} - -template -BorderSize NEHarrisScoreKernel::border_size() const -{ - return _border_size; -} - -template -void NEHarrisScoreKernel::configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_ON(0.0f == norm_factor); - - _input1 = input1; - _input2 = input2; - _output = output; - _sensitivity = sensitivity; - _strength_thresh = strength_thresh; - _norm_factor = norm_factor; - _border_size = BorderSize(block_size / 2); - - if(input1->info()->data_type() == DataType::S16) - { - switch(block_size) - { - case 3: - _func = &harris_score3x3_S16_S16_FLOAT; - break; - case 5: - _func = &harris_score5x5_S16_S16_FLOAT; - break; - case 7: - _func = &harris_score7x7_S16_S16_FLOAT; - break; - default: - ARM_COMPUTE_ERROR("Invalid block size"); - break; - } - } - else - { - switch(block_size) - { - case 3: - _func = &harris_score3x3_S32_S32_FLOAT; - break; - case 5: - _func = &harris_score5x5_S32_S32_FLOAT; - break; - case 7: - _func = &harris_score7x7_S32_S32_FLOAT; - break; - default: - ARM_COMPUTE_ERROR("Invalid block size"); - break; - } - } - - ARM_COMPUTE_ERROR_ON(nullptr == _func); - - constexpr unsigned int num_elems_processed_per_iteration = block_size != 7 ? 8 : 4; - constexpr unsigned int num_elems_read_per_iteration = block_size != 7 ? 16 : 12; - constexpr unsigned int num_elems_written_per_iteration = block_size != 7 ? 8 : 4; - constexpr unsigned int num_rows_read_per_iteration = block_size; - - // Configure kernel window - Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input1->info(), -_border_size.left, -_border_size.top, num_elems_read_per_iteration, num_rows_read_per_iteration), - AccessWindowRectangle(input2->info(), -_border_size.left, -_border_size.top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), - input2->info()->valid_region()); - - output_access.set_valid_region(win, valid_region, border_undefined, border_size()); - - INEKernel::configure(win); -} diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.h b/src/core/NEON/kernels/NEHarrisCornersKernel.h deleted file mode 100644 index 85f80878cc..0000000000 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H -#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H - -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" -#include "arm_compute/core/IArray.h" -#include "src/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Common interface for all Harris Score kernels */ -class INEHarrisScoreKernel : public INEKernel -{ -public: - /** Default constructor */ - INEHarrisScoreKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; - /** Allow instances of this class to be moved */ - INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; - /** Allow instances of this class to be moved */ - INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; - /** Default destructor */ - ~INEHarrisScoreKernel() = default; - -public: - /** Setup the kernel parameters - * - * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 - * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 - * @param[out] output Destination image (harris score). Data types supported: F32 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; - -protected: - const IImage *_input1; /**< Source image - Gx component */ - const IImage *_input2; /**< Source image - Gy component */ - IImage *_output; /**< Source image - Harris score */ - float _sensitivity; /**< Sensitivity value */ - float _strength_thresh; /**< Threshold value */ - float _norm_factor; /**< Normalization factor */ - BorderSize _border_size; /**< Border size */ -}; - -/** Template Neon kernel to perform Harris Score. - * The implementation supports 3, 5, and 7 for the block_size - */ -template -class NEHarrisScoreKernel : public INEHarrisScoreKernel -{ -public: - const char *name() const override - { - return "NEHarrisScoreKernel"; - } - /** Default constructor */ - NEHarrisScoreKernel(); - // Inherited methods overridden: - void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; - BorderSize border_size() const override; - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised harris score functions */ - using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float norm_factor, float sensitivity, float strength_thresh); - /** Harris Score function to use for the particular image types passed to configure() */ - HarrisScoreFunction *_func; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */ diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp deleted file mode 100644 index eddc3b29ab..0000000000 --- a/src/core/NEON/kernels/NEHistogramKernel.cpp +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEHistogramKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IDistribution1D.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -namespace arm_compute -{ -class Coordinates; - -inline void NEHistogramKernel::merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins) -{ - arm_compute::lock_guard lock(_hist_mtx); - - const unsigned int v_end = (bins / 4) * 4; - - for(unsigned int b = 0; b < v_end; b += 4) - { - const uint32x4_t tmp_global = vld1q_u32(global_hist + b); - const uint32x4_t tmp_local = vld1q_u32(local_hist + b); - vst1q_u32(global_hist + b, vaddq_u32(tmp_global, tmp_local)); - } - - for(unsigned int b = v_end; b < bins; ++b) - { - global_hist[b] += local_hist[b]; - } -} - -NEHistogramKernel::NEHistogramKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _local_hist(nullptr), _window_lut(nullptr), _hist_mtx() -{ -} - -void NEHistogramKernel::histogram_U8(Window win, const ThreadInfo &info) -{ - ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); - - const size_t bins = _output->num_bins(); - const int32_t offset = _output->offset(); - const uint32_t offrange = offset + _output->range(); - const uint32_t *const w_lut = _window_lut; - uint32_t *const local_hist = _local_hist + info.thread_id * bins; - - // Clear local_histogram - std::fill_n(local_hist, bins, 0); - - auto update_local_hist = [&](uint8_t p) - { - if(offset <= p && p < offrange) - { - ++local_hist[w_lut[p]]; - } - }; - - const int x_start = win.x().start(); - const int x_end = win.x().end(); - - // Handle X dimension manually to split into two loops - // First one will use vector operations, second one processes the left over - // pixels - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win); - - // Calculate local histogram - execute_window_loop(win, [&](const Coordinates &) - { - int x = x_start; - - // Vector loop - for(; x <= x_end - 8; x += 8) - { - const uint8x8_t pixels = vld1_u8(input.ptr() + x); - - update_local_hist(vget_lane_u8(pixels, 0)); - update_local_hist(vget_lane_u8(pixels, 1)); - update_local_hist(vget_lane_u8(pixels, 2)); - update_local_hist(vget_lane_u8(pixels, 3)); - update_local_hist(vget_lane_u8(pixels, 4)); - update_local_hist(vget_lane_u8(pixels, 5)); - update_local_hist(vget_lane_u8(pixels, 6)); - update_local_hist(vget_lane_u8(pixels, 7)); - } - - // Process leftover pixels - for(; x < x_end; ++x) - { - update_local_hist(input.ptr()[x]); - } - }, - input); - - // Merge histograms - merge_histogram(_output->buffer(), local_hist, bins); -} - -void NEHistogramKernel::histogram_fixed_U8(Window win, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); - - std::array local_hist{ { 0 } }; - - const int x_start = win.x().start(); - const int x_end = win.x().end(); - - // Handle X dimension manually to split into two loops - // First one will use vector operations, second one processes the left over - // pixels - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win); - - // Calculate local histogram - execute_window_loop(win, [&](const Coordinates &) - { - int x = x_start; - - // Vector loop - for(; x <= x_end - 8; x += 8) - { - const uint8x8_t pixels = vld1_u8(input.ptr() + x); - - ++local_hist[vget_lane_u8(pixels, 0)]; - ++local_hist[vget_lane_u8(pixels, 1)]; - ++local_hist[vget_lane_u8(pixels, 2)]; - ++local_hist[vget_lane_u8(pixels, 3)]; - ++local_hist[vget_lane_u8(pixels, 4)]; - ++local_hist[vget_lane_u8(pixels, 5)]; - ++local_hist[vget_lane_u8(pixels, 6)]; - ++local_hist[vget_lane_u8(pixels, 7)]; - } - - // Process leftover pixels - for(; x < x_end; ++x) - { - ++local_hist[input.ptr()[x]]; - } - }, - input); - - // Merge histograms - merge_histogram(_output->buffer(), local_hist.data(), _max_range_size); -} - -void NEHistogramKernel::calculate_window_lut() const -{ - const int32_t offset = _output->offset(); - const size_t bins = _output->num_bins(); - const uint32_t range = _output->range(); - - std::fill_n(_window_lut, offset, 0); - - for(unsigned int p = offset; p < _max_range_size; ++p) - { - _window_lut[p] = ((p - offset) * bins) / range; - } -} - -void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(nullptr == output); - ARM_COMPUTE_ERROR_ON(nullptr == local_hist); - ARM_COMPUTE_ERROR_ON(nullptr == window_lut); - - _input = input; - _output = output; - _local_hist = local_hist; - _window_lut = window_lut; - - //Check offset - ARM_COMPUTE_ERROR_ON_MSG(0 > _output->offset() || _output->offset() > static_cast(_max_range_size), "Offset is larger than the image value range."); - - //Check range - ARM_COMPUTE_ERROR_ON_MSG(static_cast(_output->range()) > static_cast(_max_range_size) /* max range */, "Range larger than the image value range."); - - // Calculate LUT - calculate_window_lut(); - - // Set appropriate function - _func = &NEHistogramKernel::histogram_U8; - - Window win = calculate_max_window(*input->info(), Steps()); - - INEKernel::configure(win); -} - -void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - _input = input; - _output = output; - - // Set appropriate function - _func = &NEHistogramKernel::histogram_fixed_U8; - - Window win = calculate_max_window(*input->info(), Steps()); - - INEKernel::configure(win); -} - -void NEHistogramKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window, info); -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEHistogramKernel.h b/src/core/NEON/kernels/NEHistogramKernel.h deleted file mode 100644 index e14519ce25..0000000000 --- a/src/core/NEON/kernels/NEHistogramKernel.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H -#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H - -#include "src/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include -#include - -namespace arm_compute -{ -class IDistribution1D; -class ITensor; -using IImage = ITensor; - -/** Interface for the histogram kernel */ -class NEHistogramKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHistogramKernel"; - } - /** Default constructor */ - NEHistogramKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogramKernel(const NEHistogramKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogramKernel(NEHistogramKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogramKernel &operator=(NEHistogramKernel &&) = delete; - /** Default destructor */ - ~NEHistogramKernel() = default; - - /** Set the input image and the distribution output. - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Destination distribution. - * @param[in,out] local_hist Array that the threads use to save their local histograms. - * It's size should be equal to (number_of_threads * num_bins), - * and the Window::thread_id() is used to determine the part of the array - * used by each thread. - * @param[out] window_lut LUT with pre-calculated possible window values. - * The size of the LUT should be equal to max_range_size and it will be filled - * during the configure stage, while it re-used in every run, therefore can be - * safely shared among threads. - */ - void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); - /** Set the input image and the distribution output. - * - * @note Used for histogram of fixed size equal to 256 - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Destination distribution which must be of 256 bins.. - */ - void configure(const IImage *input, IDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to merge multiple partial histograms. - * - * @param[out] global_hist Pointer to the final histogram. - * @param[in] local_hist Pointer to the partial histograms. - * @param[in] bins Number of bins. - */ - void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); - /** Function to merge multiple minimum values of partial histograms. - * - * @param[out] global_min Pointer to the global min value. - * @param[in] local_min Local min value. - */ - void merge_min(uint8_t *global_min, const uint8_t &local_min); - /** Function to perform histogram on the given window - * - * @param[in] win Region on which to execute the kernel - * @param[in] info Info about the executing thread - */ - void histogram_U8(Window win, const ThreadInfo &info); - /** Function to perform histogram on the given window where histogram is - * of fixed size 256 without ranges and offsets. - * - * @param[in] win Region on which to execute the kernel - * @param[in] info Info about the executing thread - */ - void histogram_fixed_U8(Window win, const ThreadInfo &info); - /** Pre-calculate the pixel windowing for every possible pixel - * - * Calculate (V - offset) * numBins / range where V is every possible pixel value. - * - * @note We currently support U8 image thus possible pixel values are between 0 and 255 - */ - void calculate_window_lut() const; - /** Common signature for all the specialised Histogram functions - * - * @param[in] window Region on which to execute the kernel. - */ - using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info); - - HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() - const IImage *_input; - IDistribution1D *_output; - uint32_t *_local_hist; - uint32_t *_window_lut; - arm_compute::Mutex _hist_mtx; - static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */ diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.cpp b/src/core/NEON/kernels/NEIntegralImageKernel.cpp deleted file mode 100644 index 6ee97eea30..0000000000 --- a/src/core/NEON/kernels/NEIntegralImageKernel.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEIntegralImageKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -using namespace arm_compute; - -void NEIntegralImageKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32); - - _input = input; - _output = output; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - // The kernel is effectively reading 17 values from -1 as it loads 16 - // starting at -1 and also 16 starting at 0 - AccessWindowRectangle output_read_access(output->info(), -1, -1, num_elems_processed_per_iteration + 1, 1); - AccessWindowHorizontal output_write_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), - output_read_access, output_write_access); - - output_write_access.set_valid_region(win, input->info()->valid_region()); - - IKernel::configure(win); -} - -BorderSize NEIntegralImageKernel::border_size() const -{ - return BorderSize{ 1, 0, 0, 1 }; -} - -bool NEIntegralImageKernel::is_parallelisable() const -{ - return false; -} - -void NEIntegralImageKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - Iterator input(_input, window); - Iterator output(_output, window); - - const auto output_top_left = reinterpret_cast(_output->ptr_to_element(Coordinates(-1, -1))); - const auto output_top_mid = reinterpret_cast(_output->ptr_to_element(Coordinates(0, -1))); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t input_pixels = vld1q_u8(input.ptr()); - - const uint16x8x2_t tmp = - { - { - vmovl_u8(vget_low_u8(input_pixels)), - vmovl_u8(vget_high_u8(input_pixels)) - } - }; - - uint32x4x4_t pixels = - { - { - vmovl_u16(vget_low_u16(tmp.val[0])), - vmovl_u16(vget_high_u16(tmp.val[0])), - vmovl_u16(vget_low_u16(tmp.val[1])), - vmovl_u16(vget_high_u16(tmp.val[1])) - } - }; - - // Divide by four as pointer is now uint32 instead of uint8! - const size_t off = output.offset() / 4; - - // Add top mid pixel values - const uint32_t *const top_mid_ptr = output_top_mid + off; - - pixels.val[0] = vaddq_u32(vld1q_u32(top_mid_ptr), pixels.val[0]); - pixels.val[1] = vaddq_u32(vld1q_u32(top_mid_ptr + 4), pixels.val[1]); - pixels.val[2] = vaddq_u32(vld1q_u32(top_mid_ptr + 8), pixels.val[2]); - pixels.val[3] = vaddq_u32(vld1q_u32(top_mid_ptr + 12), pixels.val[3]); - - // Subtract top left diagonal values - const auto outptr = reinterpret_cast(output.ptr()); - const uint32_t *const top_left_ptr = output_top_left + off; - - pixels.val[0] = vsubq_u32(pixels.val[0], vld1q_u32(top_left_ptr)); - vst1q_u32(outptr, pixels.val[0]); - - pixels.val[1] = vsubq_u32(pixels.val[1], vld1q_u32(top_left_ptr + 4)); - vst1q_u32(outptr + 4, pixels.val[1]); - - pixels.val[2] = vsubq_u32(pixels.val[2], vld1q_u32(top_left_ptr + 8)); - vst1q_u32(outptr + 8, pixels.val[2]); - - pixels.val[3] = vsubq_u32(pixels.val[3], vld1q_u32(top_left_ptr + 12)); - vst1q_u32(outptr + 12, pixels.val[3]); - - // Perform prefix summation - for(auto i = 0; i < 16; ++i) - { - outptr[i] += outptr[i - 1]; - } - }, - input, output); -} diff --git a/src/core/NEON/kernels/NEIntegralImageKernel.h b/src/core/NEON/kernels/NEIntegralImageKernel.h deleted file mode 100644 index 8d92504317..0000000000 --- a/src/core/NEON/kernels/NEIntegralImageKernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H -#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel to perform an image integral on an image */ -class NEIntegralImageKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEIntegralImageKernel"; - } - /** Default constructor */ - NEIntegralImageKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIntegralImageKernel(const NEIntegralImageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIntegralImageKernel &operator=(const NEIntegralImageKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIntegralImageKernel(NEIntegralImageKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEIntegralImageKernel &operator=(NEIntegralImageKernel &&) = delete; - /** Default destructor */ - ~NEIntegralImageKernel() = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U32 - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - bool is_parallelisable() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp deleted file mode 100644 index 205f67823d..0000000000 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ /dev/null @@ -1,490 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -namespace -{ -// Defines for computing atan2 -constexpr float SCALE_FACTOR = 0.7111111111111111f; -constexpr float PI = 3.141592653589793f; -constexpr float SCALE_180 = 180.0f / PI; -constexpr float SCALE_360 = SCALE_180 * SCALE_FACTOR; -constexpr float PI_4 = 0.7853981633974483f; -constexpr float COEFF1 = 0.0663f; -constexpr float COEFF2 = 0.2447f; -} // namespace - -namespace -{ -inline float32x4_t inv(float32x4_t x) -{ - float32x4_t result = vrecpeq_f32(x); - result = vmulq_f32(vrecpsq_f32(x, result), result); - return result; -} - -inline float32x4_t atan2_0_360(float32x4_t gx, float32x4_t gy) -{ - const float32x4_t zero = vdupq_n_f32(0.0f); - const float32x4_t epsilon = vdupq_n_f32(1e-9f); - const float32x4_t piover4 = vdupq_n_f32(PI_4); - const float32x4_t coeff1 = vdupq_n_f32(COEFF1); - const float32x4_t coeff2 = vdupq_n_f32(COEFF2); - const float32x4_t ninety = vdupq_n_f32(90.0f * SCALE_FACTOR); - const float32x4_t oneeighty = vdupq_n_f32(180.0f * SCALE_FACTOR); - const float32x4_t threesixty = vdupq_n_f32(360.0f * SCALE_FACTOR); - const float32x4_t scale = vdupq_n_f32(SCALE_360); - - float32x4_t abs_gx = vabsq_f32(gx); - float32x4_t abs_gy = vabsq_f32(gy); - float32x4_t tmin = vminq_f32(abs_gx, abs_gy); - float32x4_t tmax = vmaxq_f32(abs_gx, abs_gy); - float32x4_t z = vmulq_f32(tmin, inv(vaddq_f32(tmax, epsilon))); - float32x4_t absz = vabsq_f32(z); - float32x4_t term = vmulq_f32(z, vsubq_f32(vdupq_n_f32(1.0f), absz)); - - /* Compute y = pi/4 * x - x*(abs(x)-1)*(0.2447+0.0663 * abs(x) */ - float32x4_t result = vaddq_f32(coeff2, vmulq_f32(absz, coeff1)); - result = vmulq_f32(result, term); - result = vmlaq_f32(result, piover4, z); - - /* Radians to degrees conversion with applied a scale factor in order to have the result [0, 255] */ - result = vmulq_f32(result, scale); - - /* If z > 1, result = 90 - result */ - result = vbslq_f32(vcgeq_f32(abs_gx, abs_gy), result, vsubq_f32(ninety, result)); - - /* Choose correct quadrant */ - result = vbslq_f32(vcltq_f32(gx, zero), vsubq_f32(oneeighty, result), result); - result = vbslq_f32(vcltq_f32(gy, zero), vsubq_f32(threesixty, result), result); - - return result; -} - -inline float32x4_t atan2_0_180(float32x4_t gx, float32x4_t gy) -{ - const float32x4_t zero = vdupq_n_f32(0.0f); - const float32x4_t epsilon = vdupq_n_f32(1e-9f); // epsilon used to avoiding division by 0 - const float32x4_t piover4 = vdupq_n_f32(PI_4); - const float32x4_t coeff1 = vdupq_n_f32(COEFF1); - const float32x4_t coeff2 = vdupq_n_f32(COEFF2); - const float32x4_t ninety = vdupq_n_f32(90.0f); - const float32x4_t oneeighty = vdupq_n_f32(180.0f); - const float32x4_t threesixty = vdupq_n_f32(360.0f); - const float32x4_t scale = vdupq_n_f32(SCALE_180); - - float32x4_t abs_gx = vabsq_f32(gx); - float32x4_t abs_gy = vabsq_f32(gy); - float32x4_t tmin = vminq_f32(abs_gx, abs_gy); - float32x4_t tmax = vmaxq_f32(abs_gx, abs_gy); - float32x4_t z = vmulq_f32(tmin, inv(vaddq_f32(tmax, epsilon))); - float32x4_t absz = vabsq_f32(z); - - /* Compute y = pi/4 * z - z*(abs(z)-1)*(0.2447+0.0663 * abs(z) */ - float32x4_t term = vmulq_f32(z, vsubq_f32(vdupq_n_f32(1.0f), absz)); - float32x4_t result = vaddq_f32(coeff2, vmulq_f32(absz, coeff1)); - result = vmulq_f32(result, term); - result = vmlaq_f32(result, piover4, z); - - /* Radians to degrees conversion */ - result = vmulq_f32(result, scale); - - /* If z > 1, result = 90 - result */ - result = vbslq_f32(vcgeq_f32(abs_gx, abs_gy), result, vsubq_f32(ninety, result)); - - /* Choose correct quadrant */ - result = vbslq_f32(vcltq_f32(gx, zero), vsubq_f32(oneeighty, result), result); - result = vbslq_f32(vcltq_f32(gy, zero), vsubq_f32(threesixty, result), result); - result = vbslq_f32(vcgtq_f32(result, oneeighty), vsubq_f32(result, oneeighty), result); - - return result; -} - -inline float32x4_t invsqrtv(float32x4_t x) -{ - float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); - - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), - sqrt_reciprocal); - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), - sqrt_reciprocal); - - return sqrt_reciprocal; -} - -inline float32x4_t sqrtv(float32x4_t x) -{ - float32x4_t res = vdupq_n_f32(0.5f); - return vmlaq_f32(res, x, invsqrtv(x)); -} - -inline int16x8_t magnitude_l2(int16x8_t input1, int16x8_t input2) -{ - const int32x4x2_t square_x = - { - { - vmull_s16(vget_low_s16(input1), vget_low_s16(input1)), - vmull_s16(vget_high_s16(input1), vget_high_s16(input1)) - } - }; - - const int32x4x2_t square_y = - { - { - vmull_s16(vget_low_s16(input2), vget_low_s16(input2)), - vmull_s16(vget_high_s16(input2), vget_high_s16(input2)) - } - }; - - const uint32x4x2_t sum = - { - { - vaddq_u32(vreinterpretq_u32_s32(square_x.val[0]), vreinterpretq_u32_s32(square_y.val[0])), - vaddq_u32(vreinterpretq_u32_s32(square_x.val[1]), vreinterpretq_u32_s32(square_y.val[1])) - } - }; - - const float32x4x2_t res = - { - { - sqrtv(vcvtq_f32_u32(sum.val[0])), - sqrtv(vcvtq_f32_u32(sum.val[1])) - } - }; - - return vcombine_s16(vqmovn_s32(vcvtq_s32_f32(res.val[0])), - vqmovn_s32(vcvtq_s32_f32(res.val[1]))); -} - -inline int16x8_t magnitude_l1(int16x8_t input1, int16x8_t input2) -{ - /* Saturating add */ - return vqaddq_s16(vqabsq_s16(input1), vqabsq_s16(input2)); -} - -inline uint8x8_t phase_signed(int16x8_t input1, int16x8_t input2) -{ - const float32x4_t zeropointfive = vdupq_n_f32(0.5f); - - float32x4_t inputx_f32_high = vcvtq_f32_s32(vmovl_s16(vget_high_s16(input1))); - float32x4_t inputx_f32_low = vcvtq_f32_s32(vmovl_s16(vget_low_s16(input1))); - float32x4_t inputy_f32_high = vcvtq_f32_s32(vmovl_s16(vget_high_s16(input2))); - float32x4_t inputy_f32_low = vcvtq_f32_s32(vmovl_s16(vget_low_s16(input2))); - - /* Compute fast atan2 */ - float32x4_t angle_high = atan2_0_360(inputx_f32_high, inputy_f32_high); - float32x4_t angle_low = atan2_0_360(inputx_f32_low, inputy_f32_low); - - angle_high = vaddq_f32(angle_high, zeropointfive); - angle_low = vaddq_f32(angle_low, zeropointfive); - - return vmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(angle_low)), - vqmovun_s32(vcvtq_s32_f32(angle_high)))); -} - -inline uint8x8_t phase_unsigned(int16x8_t input1, int16x8_t input2) -{ - const float32x4_t zeropointfive = vdupq_n_f32(0.5f); - - float32x4_t inputx_f32_high = vcvtq_f32_s32(vmovl_s16(vget_high_s16(input1))); - float32x4_t inputx_f32_low = vcvtq_f32_s32(vmovl_s16(vget_low_s16(input1))); - float32x4_t inputy_f32_high = vcvtq_f32_s32(vmovl_s16(vget_high_s16(input2))); - float32x4_t inputy_f32_low = vcvtq_f32_s32(vmovl_s16(vget_low_s16(input2))); - - /* Compute fast atan2 */ - float32x4_t angle_high = atan2_0_180(inputx_f32_high, inputy_f32_high); - float32x4_t angle_low = atan2_0_180(inputx_f32_low, inputy_f32_low); - - angle_high = vaddq_f32(angle_high, zeropointfive); - angle_low = vaddq_f32(angle_low, zeropointfive); - - return vmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(angle_low)), - vqmovun_s32(vcvtq_s32_f32(angle_high)))); -} -} // namespace - -template -NEMagnitudePhaseKernel::NEMagnitudePhaseKernel() - : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr) -{ -} - -template -void NEMagnitudePhaseKernel::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON((nullptr == magnitude) && (nullptr == phase)); - - const bool run_mag = magnitude != nullptr; - const bool run_phase = phase != nullptr; - - if(run_mag) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::S16); - } - - if(run_phase) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); - } - - _gx = gx; - _gy = gy; - _magnitude = magnitude; - _phase = phase; - - if(run_mag && run_phase) - { - /* Run magnitude and phase */ - _func = &NEMagnitudePhaseKernel::magnitude_phase; - } - else - { - if(run_mag) - { - /* Run magnitude */ - _func = &NEMagnitudePhaseKernel::magnitude; - } - else if(run_phase) - { - /* Run phase */ - _func = &NEMagnitudePhaseKernel::phase; - } - else - { - ARM_COMPUTE_ERROR("At least one output must be NOT NULL"); - } - } - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Configure kernel window - Window win = calculate_max_window(*gx->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal magnitude_access(magnitude == nullptr ? nullptr : magnitude->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal phase_access(phase == nullptr ? nullptr : phase->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(gx->info(), 0, num_elems_processed_per_iteration), - AccessWindowHorizontal(gy->info(), 0, num_elems_processed_per_iteration), - magnitude_access, - phase_access); - - ValidRegion valid_region = intersect_valid_regions(gx->info()->valid_region(), - gy->info()->valid_region()); - - magnitude_access.set_valid_region(win, valid_region); - phase_access.set_valid_region(win, valid_region); - - INEKernel::configure(win); -} - -template -void NEMagnitudePhaseKernel::magnitude(const Window &window) -{ - Iterator gx(_gx, window); - Iterator gy(_gy, window); - Iterator magnitude(_magnitude, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const int16x8x2_t input1 = - { - { - vld1q_s16(reinterpret_cast(gx.ptr())), - vld1q_s16(reinterpret_cast(gx.ptr()) + 8) - } - }; - - const int16x8x2_t input2 = - { - { - vld1q_s16(reinterpret_cast(gy.ptr())), - vld1q_s16(reinterpret_cast(gy.ptr()) + 8) - } - }; - - /* Compute magnitude */ - int16x8x2_t mag{ {} }; - - if(MagnitudeType::L2NORM == mag_type) - { - mag.val[0] = magnitude_l2(input1.val[0], input2.val[0]); - mag.val[1] = magnitude_l2(input1.val[1], input2.val[1]); - } - else - { - mag.val[0] = magnitude_l1(input1.val[0], input2.val[0]); - mag.val[1] = magnitude_l1(input1.val[1], input2.val[1]); - } - - /* Store magnitude */ - vst1q_s16(reinterpret_cast(magnitude.ptr()), mag.val[0]); - vst1q_s16(reinterpret_cast(magnitude.ptr()) + 8, mag.val[1]); - }, - gx, gy, magnitude); -} - -template -void NEMagnitudePhaseKernel::phase(const Window &window) -{ - Iterator gx(_gx, window); - Iterator gy(_gy, window); - Iterator phase(_phase, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const int16x8x2_t input1 = - { - { - vld1q_s16(reinterpret_cast(gx.ptr())), - vld1q_s16(reinterpret_cast(gx.ptr()) + 8) - } - }; - - const int16x8x2_t input2 = - { - { - vld1q_s16(reinterpret_cast(gy.ptr())), - vld1q_s16(reinterpret_cast(gy.ptr()) + 8) - } - }; - - /* Compute phase */ - uint8x8x2_t vphase{ {} }; - - if(PhaseType::SIGNED == phase_type) - { - vphase.val[0] = phase_signed(input1.val[0], input2.val[0]); - vphase.val[1] = phase_signed(input1.val[1], input2.val[1]); - } - else - { - vphase.val[0] = phase_unsigned(input1.val[0], input2.val[0]); - vphase.val[1] = phase_unsigned(input1.val[1], input2.val[1]); - } - - /* Store phase */ - vst1q_u8(phase.ptr(), vcombine_u8(vphase.val[0], vphase.val[1])); - }, - gx, gy, phase); -} - -template -void NEMagnitudePhaseKernel::magnitude_phase(const Window &window) -{ - Iterator gx(_gx, window); - Iterator gy(_gy, window); - Iterator magnitude(_magnitude, window); - Iterator phase(_phase, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const int16x8x2_t input1 = - { - { - vld1q_s16(reinterpret_cast(gx.ptr())), - vld1q_s16(reinterpret_cast(gx.ptr()) + 8) - } - }; - - const int16x8x2_t input2 = - { - { - vld1q_s16(reinterpret_cast(gy.ptr())), - vld1q_s16(reinterpret_cast(gy.ptr()) + 8) - } - }; - - /* Compute magnitude */ - int16x8x2_t mag{ {} }; - - if(MagnitudeType::L2NORM == mag_type) - { - mag.val[0] = magnitude_l2(input1.val[0], input2.val[0]); - mag.val[1] = magnitude_l2(input1.val[1], input2.val[1]); - } - else - { - mag.val[0] = magnitude_l1(input1.val[0], input2.val[0]); - mag.val[1] = magnitude_l1(input1.val[1], input2.val[1]); - } - - /* Store magnitude */ - vst1q_s16(reinterpret_cast(magnitude.ptr()), mag.val[0]); - vst1q_s16(reinterpret_cast(magnitude.ptr()) + 8, mag.val[1]); - - /* Compute phase */ - uint8x8x2_t vphase{ {} }; - - if(PhaseType::SIGNED == phase_type) - { - vphase.val[0] = phase_signed(input1.val[0], input2.val[0]); - vphase.val[1] = phase_signed(input1.val[1], input2.val[1]); - } - else - { - vphase.val[0] = phase_unsigned(input1.val[0], input2.val[0]); - vphase.val[1] = phase_unsigned(input1.val[1], input2.val[1]); - } - - /* Store phase */ - vst1q_u8(phase.ptr(), vcombine_u8(vphase.val[0], vphase.val[1])); - }, - gx, gy, magnitude, phase); -} - -template -void NEMagnitudePhaseKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} - -template class arm_compute::NEMagnitudePhaseKernel; -template class arm_compute::NEMagnitudePhaseKernel; -template class arm_compute::NEMagnitudePhaseKernel; -template class arm_compute::NEMagnitudePhaseKernel; diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.h b/src/core/NEON/kernels/NEMagnitudePhaseKernel.h deleted file mode 100644 index 3803d05ce9..0000000000 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H -#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Template interface for the kernel to compute magnitude and phase */ -template -class NEMagnitudePhaseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMagnitudePhaseKernel"; - } - /** Default constructor */ - NEMagnitudePhaseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; - /** Default move constructor */ - NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; - /** Default move assignment operator */ - NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; - /** Destructor */ - ~NEMagnitudePhaseKernel() = default; - - /** Initialise the kernel's input, output. - * - * @note At least one of out1 or out2 must be set - * - * @param[in] gx Gradient X tensor. Data type supported: S16. - * @param[in] gy Gradient Y tensor. Data type supported: S16. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. - * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. - */ - void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to perform magnitude on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void magnitude(const Window &window); - /** Function to perform phase on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void phase(const Window &window); - /** Function to perform magnitude and phase on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void magnitude_phase(const Window &window); - -private: - /** Common signature for all the specialised MagnitudePhase functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); - /** MagnitudePhase function to use for the particular formats passed to configure() */ - MagnitudePhaseFunctionPtr _func; - const ITensor *_gx; /**< Input gradient X */ - const ITensor *_gy; /**< Input gradient Y */ - ITensor *_magnitude; /**< Output - Magnitude */ - ITensor *_phase; /**< Output - Phase */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp deleted file mode 100644 index a6bb9f2ef7..0000000000 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -namespace -{ -template -std::pair accumulate(const Window &window, Iterator &iterator) -{ - uint64x1_t sum = vdup_n_u64(0); - uint64x1_t sum_squared = vdup_n_u64(0); - - // Calculate sum - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t in_data = vld1q_u8(iterator.ptr()); - - // Sum of the low and high elements of data - const uint16x8_t tmp0 = vaddl_u8(vget_low_u8(in_data), vget_high_u8(in_data)); - const uint32x4_t tmp1 = vaddl_u16(vget_low_u16(tmp0), vget_high_u16(tmp0)); - const uint32x2_t tmp2 = vadd_u32(vget_low_u32(tmp1), vget_high_u32(tmp1)); - - // Update sum - sum = vpadal_u32(sum, tmp2); - - if(calc_sum_squared) - { - const uint16x8_t square_data_low = vmull_u8(vget_low_u8(in_data), vget_low_u8(in_data)); - const uint16x8_t square_data_high = vmull_u8(vget_high_u8(in_data), vget_high_u8(in_data)); - - // Sum of the low and high elements of data - const uint32x4_t tmp0_low = vaddl_u16(vget_low_u16(square_data_low), vget_high_u16(square_data_low)); - const uint32x4_t tmp0_high = vaddl_u16(vget_low_u16(square_data_high), vget_high_u16(square_data_high)); - const uint32x4_t tmp1 = vaddq_u32(tmp0_low, tmp0_high); - const uint32x2_t tmp2 = vadd_u32(vget_low_u32(tmp1), vget_high_u32(tmp1)); - - // Update sum - sum_squared = vpadal_u32(sum_squared, tmp2); - } - }, - iterator); - - return std::make_pair(sum, sum_squared); -} -} // namespace - -NEMeanStdDevKernel::NEMeanStdDevKernel() - : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr), _mtx(), _border_size(0) -{ -} - -BorderSize NEMeanStdDevKernel::border_size() const -{ - return _border_size; -} - -void NEMeanStdDevKernel::configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev, uint64_t *global_sum_squared) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(nullptr == mean); - ARM_COMPUTE_ERROR_ON(nullptr == global_sum); - ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - - _input = input; - _mean = mean; - _stddev = stddev; - _global_sum = global_sum; - _global_sum_squared = global_sum_squared; - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration) - input->info()->dimension(0)); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - - INEKernel::configure(win); -} - -void NEMeanStdDevKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - Iterator input(_input, window); - - uint64x1_t local_sum = vdup_n_u64(0); - uint64x1_t local_sum_squared = vdup_n_u64(0); - - if(_stddev != nullptr) - { - std::tie(local_sum, local_sum_squared) = accumulate(window, input); - } - else - { - std::tie(local_sum, local_sum_squared) = accumulate(window, input); - } - - const float num_pixels = _input->info()->dimension(0) * _input->info()->dimension(1); - - // Merge sum and calculate mean and stddev - arm_compute::unique_lock lock(_mtx); - - *_global_sum += vget_lane_u64(local_sum, 0); - - const float mean = *_global_sum / num_pixels; - *_mean = mean; - - if(_stddev != nullptr) - { - const uint64_t tmp_sum_squared = vget_lane_u64(local_sum_squared, 0); - *_global_sum_squared += tmp_sum_squared; - *_stddev = std::sqrt((*_global_sum_squared / num_pixels) - (mean * mean)); - } - - lock.unlock(); -} diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.h b/src/core/NEON/kernels/NEMeanStdDevKernel.h deleted file mode 100644 index e694f3824d..0000000000 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H -#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H - -#include "src/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ -class NEMeanStdDevKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMeanStdDevKernel"; - } - /** Default constructor */ - NEMeanStdDevKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete; - /** Default destructor */ - ~NEMeanStdDevKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values. - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. - */ - void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - BorderSize border_size() const override; - -private: - const IImage *_input; - float *_mean; - float *_stddev; - uint64_t *_global_sum; - uint64_t *_global_sum_squared; - arm_compute::Mutex _mtx; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */ diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp b/src/core/NEON/kernels/NEMedian3x3Kernel.cpp deleted file mode 100644 index 0160edc650..0000000000 --- a/src/core/NEON/kernels/NEMedian3x3Kernel.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Validate.h" -#include "src/core/NEON/INEKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -namespace -{ -inline void sort(uint8x8_t &a, uint8x8_t &b) -{ - const uint8x8_t min = vmin_u8(a, b); - const uint8x8_t max = vmax_u8(a, b); - a = min; - b = max; -} -} // namespace - -BorderSize NEMedian3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void NEMedian3x3Kernel::configure(const ITensor *input, ITensor *output, bool border_undefined) -{ - _input = input; - _output = output; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEMedian3x3Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - - const unsigned char *input_bot_ptr = _input->ptr_to_element(Coordinates(-1, -1)); - const unsigned char *input_mid_ptr = _input->ptr_to_element(Coordinates(-1, 0)); - const unsigned char *input_top_ptr = _input->ptr_to_element(Coordinates(-1, +1)); - - Iterator input(_input, window); - Iterator output(_output, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - uint8x8_t p0 = vget_low_u8(top_data); - uint8x8_t p1 = vext_u8(vget_low_u8(top_data), vget_high_u8(top_data), 1); - uint8x8_t p2 = vext_u8(vget_low_u8(top_data), vget_high_u8(top_data), 2); - uint8x8_t p3 = vget_low_u8(mid_data); - uint8x8_t p4 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 1); - uint8x8_t p5 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 2); - uint8x8_t p6 = vget_low_u8(bot_data); - uint8x8_t p7 = vext_u8(vget_low_u8(bot_data), vget_high_u8(bot_data), 1); - uint8x8_t p8 = vext_u8(vget_low_u8(bot_data), vget_high_u8(bot_data), 2); - - sort(p1, p2); - sort(p4, p5); - sort(p7, p8); - - sort(p0, p1); - sort(p3, p4); - sort(p6, p7); - - sort(p1, p2); - sort(p4, p5); - sort(p7, p8); - - sort(p0, p3); - sort(p5, p8); - sort(p4, p7); - - sort(p3, p6); - sort(p1, p4); - sort(p2, p5); - - sort(p4, p7); - sort(p4, p2); - sort(p6, p4); - - sort(p4, p2); - - vst1_u8(output.ptr(), p4); - }, - input, output); -} diff --git a/src/core/NEON/kernels/NEMedian3x3Kernel.h b/src/core/NEON/kernels/NEMedian3x3Kernel.h deleted file mode 100644 index b9e28b3053..0000000000 --- a/src/core/NEON/kernels/NEMedian3x3Kernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H -#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel to perform a median filter on a tensor */ -class NEMedian3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEMedian3x3Kernel"; - } - /** Default constructor */ - NEMedian3x3Kernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMedian3x3Kernel(const NEMedian3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMedian3x3Kernel &operator=(const NEMedian3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEMedian3x3Kernel(NEMedian3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEMedian3x3Kernel &operator=(NEMedian3x3Kernel &&) = default; - /** Default destructor */ - ~NEMedian3x3Kernel() = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp b/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp deleted file mode 100644 index 402e6f1811..0000000000 --- a/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Utility.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -namespace arm_compute -{ -NEMinMaxKernel::NEMinMaxKernel() - : _func(), _input(nullptr), _min(), _max(), _mtx() -{ -} - -void NEMinMaxKernel::configure(const IImage *input, void *min, void *max) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32); - ARM_COMPUTE_ERROR_ON(nullptr == min); - ARM_COMPUTE_ERROR_ON(nullptr == max); - - _input = input; - _min = min; - _max = max; - - switch(_input->info()->data_type()) - { - case DataType::U8: - _func = &NEMinMaxKernel::minmax_U8; - break; - case DataType::S16: - _func = &NEMinMaxKernel::minmax_S16; - break; - case DataType::F32: - _func = &NEMinMaxKernel::minmax_F32; - break; - default: - ARM_COMPUTE_ERROR("Unsupported data type"); - break; - } - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 1; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - INEKernel::configure(win); -} - -void NEMinMaxKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} - -void NEMinMaxKernel::reset() -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - switch(_input->info()->data_type()) - { - case DataType::U8: - *static_cast(_min) = UCHAR_MAX; - *static_cast(_max) = 0; - break; - case DataType::S16: - *static_cast(_min) = SHRT_MAX; - *static_cast(_max) = SHRT_MIN; - break; - case DataType::F32: - *static_cast(_min) = std::numeric_limits::max(); - *static_cast(_max) = std::numeric_limits::lowest(); - break; - default: - ARM_COMPUTE_ERROR("Unsupported data type"); - break; - } -} - -template -void NEMinMaxKernel::update_min_max(const T min, const T max) -{ - arm_compute::lock_guard lock(_mtx); - - using type = typename std::conditional::value, float, int32_t>::type; - - auto min_ptr = static_cast(_min); - auto max_ptr = static_cast(_max); - - if(min < *min_ptr) - { - *min_ptr = min; - } - - if(max > *max_ptr) - { - *max_ptr = max; - } -} - -void NEMinMaxKernel::minmax_U8(Window win) -{ - uint8x8_t carry_min = vdup_n_u8(UCHAR_MAX); - uint8x8_t carry_max = vdup_n_u8(0); - - uint8_t carry_max_scalar = 0; - uint8_t carry_min_scalar = UCHAR_MAX; - - const int x_start = win.x().start(); - const int x_end = win.x().end(); - - // Handle X dimension manually to split into two loops - // First one will use vector operations, second one processes the left over pixels - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win); - - execute_window_loop(win, [&](const Coordinates &) - { - int x = x_start; - - // Vector loop - for(; x <= x_end - 16; x += 16) - { - const uint8x16_t pixels = vld1q_u8(input.ptr() + x); - const uint8x8_t tmp_min = vmin_u8(vget_high_u8(pixels), vget_low_u8(pixels)); - const uint8x8_t tmp_max = vmax_u8(vget_high_u8(pixels), vget_low_u8(pixels)); - carry_min = vmin_u8(tmp_min, carry_min); - carry_max = vmax_u8(tmp_max, carry_max); - } - - // Process leftover pixels - for(; x < x_end; ++x) - { - const uint8_t pixel = input.ptr()[x]; - carry_min_scalar = std::min(pixel, carry_min_scalar); - carry_max_scalar = std::max(pixel, carry_max_scalar); - } - }, - input); - - // Reduce result - carry_min = vpmin_u8(carry_min, carry_min); - carry_max = vpmax_u8(carry_max, carry_max); - carry_min = vpmin_u8(carry_min, carry_min); - carry_max = vpmax_u8(carry_max, carry_max); - carry_min = vpmin_u8(carry_min, carry_min); - carry_max = vpmax_u8(carry_max, carry_max); - - // Extract max/min values - const uint8_t min_i = std::min(vget_lane_u8(carry_min, 0), carry_min_scalar); - const uint8_t max_i = std::max(vget_lane_u8(carry_max, 0), carry_max_scalar); - - // Perform reduction of local min/max values - update_min_max(min_i, max_i); -} - -void NEMinMaxKernel::minmax_S16(Window win) -{ - int16x4_t carry_min = vdup_n_s16(SHRT_MAX); - int16x4_t carry_max = vdup_n_s16(SHRT_MIN); - - int16_t carry_max_scalar = SHRT_MIN; - int16_t carry_min_scalar = SHRT_MAX; - - const int x_start = win.x().start(); - const int x_end = win.x().end(); - - // Handle X dimension manually to split into two loops - // First one will use vector operations, second one processes the left over pixels - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win); - - execute_window_loop(win, [&](const Coordinates &) - { - int x = x_start; - const auto in_ptr = reinterpret_cast(input.ptr()); - - // Vector loop - for(; x <= x_end - 16; x += 16) - { - const int16x8x2_t pixels = vld2q_s16(in_ptr + x); - const int16x8_t tmp_min1 = vminq_s16(pixels.val[0], pixels.val[1]); - const int16x8_t tmp_max1 = vmaxq_s16(pixels.val[0], pixels.val[1]); - const int16x4_t tmp_min2 = vmin_s16(vget_high_s16(tmp_min1), vget_low_s16(tmp_min1)); - const int16x4_t tmp_max2 = vmax_s16(vget_high_s16(tmp_max1), vget_low_s16(tmp_max1)); - carry_min = vmin_s16(tmp_min2, carry_min); - carry_max = vmax_s16(tmp_max2, carry_max); - } - - // Process leftover pixels - for(; x < x_end; ++x) - { - const int16_t pixel = in_ptr[x]; - carry_min_scalar = std::min(pixel, carry_min_scalar); - carry_max_scalar = std::max(pixel, carry_max_scalar); - } - - }, - input); - - // Reduce result - carry_min = vpmin_s16(carry_min, carry_min); - carry_max = vpmax_s16(carry_max, carry_max); - carry_min = vpmin_s16(carry_min, carry_min); - carry_max = vpmax_s16(carry_max, carry_max); - - // Extract max/min values - const int16_t min_i = std::min(vget_lane_s16(carry_min, 0), carry_min_scalar); - const int16_t max_i = std::max(vget_lane_s16(carry_max, 0), carry_max_scalar); - - // Perform reduction of local min/max values - update_min_max(min_i, max_i); -} - -void NEMinMaxKernel::minmax_F32(Window win) -{ - float32x2_t carry_min = vdup_n_f32(std::numeric_limits::max()); - float32x2_t carry_max = vdup_n_f32(std::numeric_limits::lowest()); - - float carry_min_scalar = std::numeric_limits::max(); - float carry_max_scalar = std::numeric_limits::lowest(); - - const int x_start = win.x().start(); - const int x_end = win.x().end(); - - // Handle X dimension manually to split into two loops - // First one will use vector operations, second one processes the left over pixels - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Iterator input(_input, win); - - execute_window_loop(win, [&](const Coordinates &) - { - int x = x_start; - const auto in_ptr = reinterpret_cast(input.ptr()); - - // Vector loop - for(; x <= x_end - 8; x += 8) - { - const float32x4x2_t pixels = vld2q_f32(in_ptr + x); - const float32x4_t tmp_min1 = vminq_f32(pixels.val[0], pixels.val[1]); - const float32x4_t tmp_max1 = vmaxq_f32(pixels.val[0], pixels.val[1]); - const float32x2_t tmp_min2 = vmin_f32(vget_high_f32(tmp_min1), vget_low_f32(tmp_min1)); - const float32x2_t tmp_max2 = vmax_f32(vget_high_f32(tmp_max1), vget_low_f32(tmp_max1)); - carry_min = vmin_f32(tmp_min2, carry_min); - carry_max = vmax_f32(tmp_max2, carry_max); - } - - // Process leftover pixels - for(; x < x_end; ++x) - { - const float pixel = in_ptr[x]; - carry_min_scalar = std::min(pixel, carry_min_scalar); - carry_max_scalar = std::max(pixel, carry_max_scalar); - } - - }, - input); - - // Reduce result - carry_min = vpmin_f32(carry_min, carry_min); - carry_max = vpmax_f32(carry_max, carry_max); - carry_min = vpmin_f32(carry_min, carry_min); - carry_max = vpmax_f32(carry_max, carry_max); - - // Extract max/min values - const float min_i = std::min(vget_lane_f32(carry_min, 0), carry_min_scalar); - const float max_i = std::max(vget_lane_f32(carry_max, 0), carry_max_scalar); - - // Perform reduction of local min/max values - update_min_max(min_i, max_i); -} - -NEMinMaxLocationKernel::NEMinMaxLocationKernel() - : _func(nullptr), _input(nullptr), _min(nullptr), _max(nullptr), _min_count(nullptr), _max_count(nullptr), _min_loc(nullptr), _max_loc(nullptr) -{ -} - -bool NEMinMaxLocationKernel::is_parallelisable() const -{ - return false; -} - -template -struct NEMinMaxLocationKernel::create_func_table> -{ - static const std::array func_table; -}; - -template -const std::array NEMinMaxLocationKernel::create_func_table>::func_table -{ - &NEMinMaxLocationKernel::minmax_loc... -}; - -void NEMinMaxLocationKernel::configure(const IImage *input, void *min, void *max, - ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, - uint32_t *min_count, uint32_t *max_count) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32); - ARM_COMPUTE_ERROR_ON(nullptr == min); - ARM_COMPUTE_ERROR_ON(nullptr == max); - - _input = input; - _min = min; - _max = max; - _min_count = min_count; - _max_count = max_count; - _min_loc = min_loc; - _max_loc = max_loc; - - unsigned int count_min = (nullptr != min_count ? 1 : 0); - unsigned int count_max = (nullptr != max_count ? 1 : 0); - unsigned int loc_min = (nullptr != min_loc ? 1 : 0); - unsigned int loc_max = (nullptr != max_loc ? 1 : 0); - - unsigned int table_idx = (count_min << 3) | (count_max << 2) | (loc_min << 1) | loc_max; - - switch(input->info()->data_type()) - { - case DataType::U8: - _func = create_func_table>::func_table[table_idx]; - break; - case DataType::S16: - _func = create_func_table>::func_table[table_idx]; - break; - case DataType::F32: - _func = create_func_table>::func_table[table_idx]; - break; - default: - ARM_COMPUTE_ERROR("Unsupported data type"); - break; - } - - constexpr unsigned int num_elems_processed_per_iteration = 1; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - - INEKernel::configure(win); -} - -void NEMinMaxLocationKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} - -template -void NEMinMaxLocationKernel::minmax_loc(const Window &win) -{ - if(count_min || count_max || loc_min || loc_max) - { - Iterator input(_input, win); - - size_t min_count = 0; - size_t max_count = 0; - - // Clear min location array - if(loc_min) - { - _min_loc->clear(); - } - - // Clear max location array - if(loc_max) - { - _max_loc->clear(); - } - - using type = typename std::conditional::value, float, int32_t>::type; - - auto min_ptr = static_cast(_min); - auto max_ptr = static_cast(_max); - - execute_window_loop(win, [&](const Coordinates & id) - { - auto in_ptr = reinterpret_cast(input.ptr()); - int32_t idx = id.x(); - int32_t idy = id.y(); - - const T pixel = *in_ptr; - Coordinates2D p{ idx, idy }; - - if(count_min || loc_min) - { - if(*min_ptr == pixel) - { - if(count_min) - { - ++min_count; - } - - if(loc_min) - { - _min_loc->push_back(p); - } - } - } - - if(count_max || loc_max) - { - if(*max_ptr == pixel) - { - if(count_max) - { - ++max_count; - } - - if(loc_max) - { - _max_loc->push_back(p); - } - } - } - }, - input); - - if(count_min) - { - *_min_count = min_count; - } - - if(count_max) - { - *_max_count = max_count; - } - } -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEMinMaxLocationKernel.h b/src/core/NEON/kernels/NEMinMaxLocationKernel.h deleted file mode 100644 index a24666096f..0000000000 --- a/src/core/NEON/kernels/NEMinMaxLocationKernel.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H -#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "src/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Interface for the kernel to perform min max search on an image. */ -class NEMinMaxKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxKernel"; - } - /** Default constructor */ - NEMinMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxKernel(const NEMinMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxKernel(NEMinMaxKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete; - /** Default destructor */ - ~NEMinMaxKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const IImage *input, void *min, void *max); - /** Resets global minimum and maximum. */ - void reset(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Performs the min/max algorithm on U8 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_U8(Window win); - /** Performs the min/max algorithm on S16 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_S16(Window win); - /** Performs the min/max algorithm on F32 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_F32(Window win); - /** Common signature for all the specialised MinMax functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MinMaxFunction = void (NEMinMaxKernel::*)(Window window); - /** MinMax function to use for the particular image types passed to configure() */ - MinMaxFunction _func; - /** Helper to update min/max values **/ - template - void update_min_max(T min, T max); - - const IImage *_input; /**< Input image. */ - void *_min; /**< Minimum value. */ - void *_max; /**< Maximum value. */ - arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */ -}; - -/** Interface for the kernel to find min max locations of an image. */ -class NEMinMaxLocationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxLocationKernel"; - } - /** Default constructor */ - NEMinMaxLocationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; - /** Default destructor */ - ~NEMinMaxLocationKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_loc Array of minimum value locations. - * @param[out] max_loc Array of maximum value locations. - * @param[out] min_count Number of minimum value encounters. - * @param[out] max_count Number of maximum value encounters. - */ - void configure(const IImage *input, void *min, void *max, - ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, - uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - /** Performs the min/max location algorithm on T type images on a given window. - * - * @param win The window to run the algorithm on. - */ - template - void minmax_loc(const Window &win); - /** Common signature for all the specialised MinMaxLoc functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); - /** MinMaxLoc function to use for the particular image types passed to configure() */ - MinMaxLocFunction _func; - /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ - template - struct create_func_table; - - const IImage *_input; /**< Input image. */ - void *_min; /**< Minimum value. */ - void *_max; /**< Maximum value. */ - uint32_t *_min_count; /**< Count of minimum value encounters. */ - uint32_t *_max_count; /**< Count of maximum value encounters. */ - ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ - ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */ diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp b/src/core/NEON/kernels/NENonLinearFilterKernel.cpp deleted file mode 100644 index 58c0acd404..0000000000 --- a/src/core/NEON/kernels/NENonLinearFilterKernel.cpp +++ /dev/null @@ -1,1018 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include -#include - -namespace arm_compute -{ -namespace -{ -const uint8x16_t zero_u8 = vdupq_n_u8(0); - -template -inline uint8x8_t min_row(uint8x16_t row_data) -{ - uint8x8_t min = vget_low_u8(row_data); - - for(size_t c = 1; c < columns; ++c) - { - row_data = vextq_u8(row_data, zero_u8, 1); - min = vmin_u8(min, vget_low_u8(row_data)); - } - - return min; -} - -template -inline uint8x8_t max_row(uint8x16_t row_data) -{ - uint8x8_t max = vget_low_u8(row_data); - - for(size_t c = 1; c < columns; ++c) - { - row_data = vextq_u8(row_data, zero_u8, 1); - max = vmax_u8(max, vget_low_u8(row_data)); - } - - return max; -} - -inline void sort(uint8x8_t &a, uint8x8_t &b) -{ - const uint8x8_t min = vmin_u8(a, b); - const uint8x8_t max = vmax_u8(a, b); - a = min; - b = max; -} - -// Sorting networks below were generated using http://pages.ripco.net/~jgamble/nw.html -// Calculations that do not affect the median were removed. -inline void sort5(uint8x8_t &p0, uint8x8_t &p1, uint8x8_t &p2, uint8x8_t &p3, uint8x8_t &p4) -{ - sort(p0, p1); - sort(p2, p3); - sort(p0, p2); - sort(p1, p3); - sort(p1, p2); - sort(p0, p4); - sort(p1, p4); - sort(p2, p4); -} - -inline void sort9(uint8x8_t &p0, uint8x8_t &p1, uint8x8_t &p2, - uint8x8_t &p3, uint8x8_t &p4, uint8x8_t &p5, - uint8x8_t &p6, uint8x8_t &p7, uint8x8_t &p8) -{ - sort(p1, p2); - sort(p4, p5); - sort(p7, p8); - sort(p0, p1); - sort(p3, p4); - sort(p6, p7); - sort(p1, p2); - sort(p4, p5); - sort(p7, p8); - sort(p0, p3); - sort(p5, p8); - sort(p4, p7); - sort(p3, p6); - sort(p1, p4); - sort(p2, p5); - sort(p4, p7); - sort(p4, p2); - sort(p6, p4); - sort(p4, p2); -} - -inline void sort21(std::array &p) -{ - sort(p[0], p[1]); - sort(p[2], p[3]); - sort(p[4], p[5]); - sort(p[6], p[7]); - sort(p[8], p[9]); - sort(p[10], p[11]); - sort(p[12], p[13]); - sort(p[14], p[15]); - sort(p[16], p[17]); - sort(p[18], p[19]); - sort(p[0], p[2]); - sort(p[1], p[3]); - sort(p[4], p[6]); - sort(p[5], p[7]); - sort(p[8], p[10]); - sort(p[9], p[11]); - sort(p[12], p[14]); - sort(p[13], p[15]); - sort(p[16], p[18]); - sort(p[17], p[19]); - sort(p[1], p[2]); - sort(p[5], p[6]); - sort(p[0], p[4]); - sort(p[3], p[7]); - sort(p[9], p[10]); - sort(p[13], p[14]); - sort(p[8], p[12]); - sort(p[11], p[15]); - sort(p[17], p[18]); - sort(p[16], p[20]); - sort(p[1], p[5]); - sort(p[2], p[6]); - sort(p[9], p[13]); - sort(p[10], p[14]); - sort(p[0], p[8]); - sort(p[7], p[15]); - sort(p[17], p[20]); - sort(p[1], p[4]); - sort(p[3], p[6]); - sort(p[9], p[12]); - sort(p[11], p[14]); - sort(p[18], p[20]); - sort(p[0], p[16]); - sort(p[2], p[4]); - sort(p[3], p[5]); - sort(p[10], p[12]); - sort(p[11], p[13]); - sort(p[1], p[9]); - sort(p[6], p[14]); - sort(p[19], p[20]); - sort(p[3], p[4]); - sort(p[11], p[12]); - sort(p[1], p[8]); - sort(p[2], p[10]); - sort(p[5], p[13]); - sort(p[7], p[14]); - sort(p[3], p[11]); - sort(p[2], p[8]); - sort(p[4], p[12]); - sort(p[7], p[13]); - sort(p[1], p[17]); - sort(p[3], p[10]); - sort(p[5], p[12]); - sort(p[1], p[16]); - sort(p[2], p[18]); - sort(p[3], p[9]); - sort(p[6], p[12]); - sort(p[2], p[16]); - sort(p[3], p[8]); - sort(p[7], p[12]); - sort(p[5], p[9]); - sort(p[6], p[10]); - sort(p[4], p[8]); - sort(p[7], p[11]); - sort(p[3], p[19]); - sort(p[5], p[8]); - sort(p[7], p[10]); - sort(p[3], p[18]); - sort(p[4], p[20]); - sort(p[6], p[8]); - sort(p[7], p[9]); - sort(p[3], p[17]); - sort(p[5], p[20]); - sort(p[7], p[8]); - sort(p[3], p[16]); - sort(p[6], p[20]); - sort(p[5], p[17]); - sort(p[7], p[20]); - sort(p[4], p[16]); - sort(p[6], p[18]); - sort(p[5], p[16]); - sort(p[7], p[19]); - sort(p[7], p[18]); - sort(p[6], p[16]); - sort(p[7], p[17]); - sort(p[10], p[18]); - sort(p[7], p[16]); - sort(p[9], p[17]); - sort(p[8], p[16]); - sort(p[9], p[16]); - sort(p[10], p[16]); -} - -inline void sort25(std::array &p) -{ - sort(p[1], p[2]); - sort(p[0], p[1]); - sort(p[1], p[2]); - sort(p[4], p[5]); - sort(p[3], p[4]); - sort(p[4], p[5]); - sort(p[0], p[3]); - sort(p[2], p[5]); - sort(p[2], p[3]); - sort(p[1], p[4]); - sort(p[1], p[2]); - sort(p[3], p[4]); - sort(p[7], p[8]); - sort(p[6], p[7]); - sort(p[7], p[8]); - sort(p[10], p[11]); - sort(p[9], p[10]); - sort(p[10], p[11]); - sort(p[6], p[9]); - sort(p[8], p[11]); - sort(p[8], p[9]); - sort(p[7], p[10]); - sort(p[7], p[8]); - sort(p[9], p[10]); - sort(p[0], p[6]); - sort(p[4], p[10]); - sort(p[4], p[6]); - sort(p[2], p[8]); - sort(p[2], p[4]); - sort(p[6], p[8]); - sort(p[1], p[7]); - sort(p[5], p[11]); - sort(p[5], p[7]); - sort(p[3], p[9]); - sort(p[3], p[5]); - sort(p[7], p[9]); - sort(p[1], p[2]); - sort(p[3], p[4]); - sort(p[5], p[6]); - sort(p[7], p[8]); - sort(p[9], p[10]); - sort(p[13], p[14]); - sort(p[12], p[13]); - sort(p[13], p[14]); - sort(p[16], p[17]); - sort(p[15], p[16]); - sort(p[16], p[17]); - sort(p[12], p[15]); - sort(p[14], p[17]); - sort(p[14], p[15]); - sort(p[13], p[16]); - sort(p[13], p[14]); - sort(p[15], p[16]); - sort(p[19], p[20]); - sort(p[18], p[19]); - sort(p[19], p[20]); - sort(p[21], p[22]); - sort(p[23], p[24]); - sort(p[21], p[23]); - sort(p[22], p[24]); - sort(p[22], p[23]); - sort(p[18], p[21]); - sort(p[20], p[23]); - sort(p[20], p[21]); - sort(p[19], p[22]); - sort(p[22], p[24]); - sort(p[19], p[20]); - sort(p[21], p[22]); - sort(p[23], p[24]); - sort(p[12], p[18]); - sort(p[16], p[22]); - sort(p[16], p[18]); - sort(p[14], p[20]); - sort(p[20], p[24]); - sort(p[14], p[16]); - sort(p[18], p[20]); - sort(p[22], p[24]); - sort(p[13], p[19]); - sort(p[17], p[23]); - sort(p[17], p[19]); - sort(p[15], p[21]); - sort(p[15], p[17]); - sort(p[19], p[21]); - sort(p[13], p[14]); - sort(p[15], p[16]); - sort(p[17], p[18]); - sort(p[19], p[20]); - sort(p[21], p[22]); - sort(p[23], p[24]); - sort(p[0], p[12]); - sort(p[8], p[20]); - sort(p[8], p[12]); - sort(p[4], p[16]); - sort(p[16], p[24]); - sort(p[12], p[16]); - sort(p[2], p[14]); - sort(p[10], p[22]); - sort(p[10], p[14]); - sort(p[6], p[18]); - sort(p[6], p[10]); - sort(p[10], p[12]); - sort(p[1], p[13]); - sort(p[9], p[21]); - sort(p[9], p[13]); - sort(p[5], p[17]); - sort(p[13], p[17]); - sort(p[3], p[15]); - sort(p[11], p[23]); - sort(p[11], p[15]); - sort(p[7], p[19]); - sort(p[7], p[11]); - sort(p[11], p[13]); - sort(p[11], p[12]); -} -} // namespace - -NENonLinearFilterKernel::NENonLinearFilterKernel() - : _border_width(0), _input(nullptr), _output(nullptr), _mask(nullptr), _pattern(MatrixPattern::BOX), _function(NonLinearFilterFunction::MIN), _func_idx(0), _border_size() -{ -} - -BorderSize NENonLinearFilterKernel::border_size() const -{ - return _border_size; -} - -void NENonLinearFilterKernel::configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(3 != mask_size && 5 != mask_size); - ARM_COMPUTE_ERROR_ON(MatrixPattern::OTHER == pattern && nullptr == mask); - - // Set class variables - _border_size = BorderSize(mask_size / 2); - _input = input; - _output = output; - _mask = mask; - _pattern = pattern; - _function = function; - - // Configure kernel window - const unsigned int num_elems_processed_per_iteration = (MatrixPattern::OTHER == pattern) ? 1 : 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - - Window win = calculate_max_window(*input->info(), num_elems_processed_per_iteration, border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, - AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, mask_size), - output_access); - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); - - // Define function index - _func_idx = (3 == mask_size) ? 0 : 1; - - if(MatrixPattern::OTHER != pattern) - { - _func_idx = (_func_idx) * 3 + static_cast(function); - } -} - -void NENonLinearFilterKernel::fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern) -{ - unsigned int v = 0; - - for(int r = 0; r < rows; ++r) - { - for(int c = 0; c < cols; ++c, ++v) - { - uint8_t val = 0; - - switch(pattern) - { - case MatrixPattern::BOX: - val = 255; - break; - case MatrixPattern::CROSS: - val = ((r == (rows / 2)) || (c == (cols / 2))) ? 255 : 0; - break; - case MatrixPattern::DISK: - val = (((r - rows / 2.0f + 0.5f) * (r - rows / 2.0f + 0.5f)) / ((rows / 2.0f) * (rows / 2.0f)) + ((c - cols / 2.0f + 0.5f) * (c - cols / 2.0f + 0.5f)) / ((cols / 2.0f) * - (cols / 2.0f))) <= 1.0f ? 255 : 0; - break; - default: - return; - } - - mask[v] = val; - } - } -} - -template <> -void NENonLinearFilterKernel::median_filter_box<3, 3>(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - - const auto input_top_ptr = static_cast(_input->ptr_to_element(Coordinates(-1, -1))); - const auto input_mid_ptr = static_cast(_input->ptr_to_element(Coordinates(-1, 0))); - const auto input_bot_ptr = static_cast(_input->ptr_to_element(Coordinates(-1, 1))); - - execute_window_loop(win, [&](const Coordinates &) - { - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - uint8x8_t p0 = vget_low_u8(top_data); - uint8x8_t p1 = vext_u8(vget_low_u8(top_data), vget_high_u8(top_data), 1); - uint8x8_t p2 = vext_u8(vget_low_u8(top_data), vget_high_u8(top_data), 2); - uint8x8_t p3 = vget_low_u8(mid_data); - uint8x8_t p4 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 1); - uint8x8_t p5 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 2); - uint8x8_t p6 = vget_low_u8(bot_data); - uint8x8_t p7 = vext_u8(vget_low_u8(bot_data), vget_high_u8(bot_data), 1); - uint8x8_t p8 = vext_u8(vget_low_u8(bot_data), vget_high_u8(bot_data), 2); - - sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8); - - vst1_u8(output.ptr(), p4); - }, - input, output); -} -template <> -void NENonLinearFilterKernel::median_filter_box<5, 5>(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - - const auto input_top2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -2))); - const auto input_top_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -1))); - const auto input_mid_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 0))); - const auto input_bot_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 1))); - const auto input_bot2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 2))); - - execute_window_loop(win, [&](const Coordinates &) - { - const uint8x16_t top2_data = vld1q_u8(input_top2_ptr + input.offset()); - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - const uint8x16_t bot2_data = vld1q_u8(input_bot2_ptr + input.offset()); - - const std::array d = - { - vget_low_u8(top2_data), - vget_high_u8(top2_data), - vget_low_u8(top_data), - vget_high_u8(top_data), - vget_low_u8(mid_data), - vget_high_u8(mid_data), - vget_low_u8(bot_data), - vget_high_u8(bot_data), - vget_low_u8(bot2_data), - vget_high_u8(bot2_data) - }; - - std::array p{ 0 }; - for(unsigned int i = 0; i < 5; ++i) - { - const unsigned int idx_d = i * 2; - const unsigned int idx_p = i * 5; - - p[idx_p] = d[idx_d]; - p[idx_p + 1] = vext_u8(d[idx_d], d[idx_d + 1], 1); - p[idx_p + 2] = vext_u8(d[idx_d], d[idx_d + 1], 2); - p[idx_p + 3] = vext_u8(d[idx_d], d[idx_d + 1], 3); - p[idx_p + 4] = vext_u8(d[idx_d], d[idx_d + 1], 4); - } - - sort25(p); - - vst1_u8(output.ptr(), p[12]); - }, - input, output); -} - -template -void NENonLinearFilterKernel::min_filter_box(const Window &win) -{ - static_assert(mask_w > 0, "Mask size must not be 0"); - static_assert(mask_h > 0, "Mask size must not be 0"); - - Iterator input(_input, win); - Iterator output(_output, win); - - const int k_row_half = mask_h / 2; - const int k_col_half = mask_w / 2; - - // Set row pointers - std::array input_ptrs{ {} }; - for(int i = -k_row_half; i <= k_row_half; ++i) - { - input_ptrs[k_row_half + i] = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-k_col_half, i)); - } - - execute_window_loop(win, [&](const Coordinates &) - { - // Get min of rows - uint8x16_t rows_min = vld1q_u8(input_ptrs[0] + input.offset()); - - for(unsigned int r = 1; r < mask_h; ++r) - { - const uint8x16_t data = vld1q_u8(input_ptrs[r] + input.offset()); - rows_min = vminq_u8(rows_min, data); - } - - const uint8x8_t out = min_row(rows_min); - - // Store result as U8 - vst1_u8(output.ptr(), out); - }, - input, output); -} - -template -void NENonLinearFilterKernel::max_filter_box(const Window &win) -{ - static_assert(mask_w > 0, "Mask size must not be 0"); - static_assert(mask_h > 0, "Mask size must not be 0"); - ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr); - - Iterator input(_input, win); - Iterator output(_output, win); - - const int k_row_half = mask_h / 2; - const int k_col_half = mask_w / 2; - - // Set row pointers - std::array input_ptrs{ {} }; - for(int i = -k_row_half; i <= k_row_half; ++i) - { - input_ptrs[k_row_half + i] = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-k_col_half, i)); - } - - execute_window_loop(win, [&](const Coordinates &) - { - uint8x16_t rows_max = vld1q_u8(input_ptrs[0] + input.offset()); - - // Get max of rows - for(unsigned int r = 1; r < mask_h; ++r) - { - const uint8x16_t data = vld1q_u8(input_ptrs[r] + input.offset()); - rows_max = vmaxq_u8(rows_max, data); - } - - // Get max of columns - const uint8x8_t out = max_row(rows_max); - - // Store result as U8 - vst1_u8(output.ptr(), out); - }, - input, output); -} - -template <> -void NENonLinearFilterKernel::median_filter_cross<3, 3>(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - - const auto input_top_ptr = static_cast(_input->ptr_to_element(Coordinates(0, -1))); - const auto input_mid_ptr = static_cast(_input->ptr_to_element(Coordinates(-1, 0))); - const auto input_bot_ptr = static_cast(_input->ptr_to_element(Coordinates(0, 1))); - - execute_window_loop(win, [&](const Coordinates &) - { - const uint8x8_t top_data = vld1_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x8_t bot_data = vld1_u8(input_bot_ptr + input.offset()); - - uint8x8_t p0 = top_data; - uint8x8_t p1 = vget_low_u8(mid_data); - uint8x8_t p2 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 1); - uint8x8_t p3 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 2); - uint8x8_t p4 = bot_data; - - sort5(p0, p1, p2, p3, p4); - - vst1_u8(output.ptr(), p2); - }, - input, output); -} - -template <> -void NENonLinearFilterKernel::median_filter_cross<5, 5>(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - - const auto input_top2_ptr = static_cast(_input->ptr_to_element(Coordinates(0, -2))); - const auto input_top_ptr = static_cast(_input->ptr_to_element(Coordinates(0, -1))); - const auto input_mid_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 0))); - const auto input_bot_ptr = static_cast(_input->ptr_to_element(Coordinates(0, 1))); - const auto input_bot2_ptr = static_cast(_input->ptr_to_element(Coordinates(0, 2))); - - execute_window_loop(win, [&](const Coordinates &) - { - const uint8x8_t top2_data = vld1_u8(input_top2_ptr + input.offset()); - const uint8x8_t top_data = vld1_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x8_t bot_data = vld1_u8(input_bot_ptr + input.offset()); - const uint8x8_t bot2_data = vld1_u8(input_bot2_ptr + input.offset()); - - uint8x8_t p0 = top2_data; - uint8x8_t p1 = top_data; - uint8x8_t p2 = vget_low_u8(mid_data); - uint8x8_t p3 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 1); - uint8x8_t p4 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 2); - uint8x8_t p5 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 3); - uint8x8_t p6 = vext_u8(vget_low_u8(mid_data), vget_high_u8(mid_data), 4); - uint8x8_t p7 = bot_data; - uint8x8_t p8 = bot2_data; - - sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8); - - vst1_u8(output.ptr(), p4); - }, - input, output); -} - -template -void NENonLinearFilterKernel::min_filter_cross(const Window &win) -{ - static_assert(mask_w > 0, "Mask size must not be 0"); - static_assert(mask_h > 0, "Mask size must not be 0"); - ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr); - - Iterator input(_input, win); - Iterator output(_output, win); - - const int k_row_half = mask_h / 2; - const int k_col_half = mask_w / 2; - - const unsigned char *mid_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-k_col_half, 0)); - - // Set row pointers - std::array input_ptrs{ {} }; - for(int i = -k_row_half; i <= k_row_half; ++i) - { - input_ptrs[k_row_half + i] = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(0, i)); - } - - execute_window_loop(win, [&](const Coordinates &) - { - uint8x8_t rows_min = vld1_u8(input_ptrs[0] + input.offset()); - - // Get min of rows - for(unsigned int r = 1; r < mask_h; ++r) - { - const uint8x8_t data = vld1_u8(input_ptrs[r] + input.offset()); - rows_min = vmin_u8(rows_min, data); - } - - // Get min of middle row - const uint8x16_t data = vld1q_u8(mid_ptr + input.offset()); - uint8x8_t out = min_row(data); - - // Get final min - out = vmin_u8(out, rows_min); - - // Store result as U8 - vst1_u8(output.ptr(), out); - }, - input, output); -} - -template -void NENonLinearFilterKernel::max_filter_cross(const Window &win) -{ - static_assert(mask_w > 0, "Mask size must not be 0"); - static_assert(mask_h > 0, "Mask size must not be 0"); - ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr); - - Iterator input(_input, win); - Iterator output(_output, win); - - const int k_row_half = mask_h / 2; - const int k_col_half = mask_w / 2; - - const unsigned char *mid_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-k_col_half, 0)); - - // Set row pointers - std::array input_ptrs{ {} }; - for(int i = -k_row_half; i <= k_row_half; ++i) - { - input_ptrs[k_row_half + i] = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(0, i)); - } - - execute_window_loop(win, [&](const Coordinates &) - { - uint8x8_t rows_max = vld1_u8(input_ptrs[0] + input.offset()); - - // Get max of rows - for(unsigned int r = 1; r < mask_h; ++r) - { - const uint8x8_t data = vld1_u8(input_ptrs[r] + input.offset()); - rows_max = vmax_u8(rows_max, data); - } - - // Get max of middle row - const uint8x16_t data = vld1q_u8(mid_ptr + input.offset()); - uint8x8_t out = max_row(data); - - // Get final max - out = vmax_u8(out, rows_max); - - // Store result as U8 - vst1_u8(output.ptr(), out); - }, - input, output); -} - -template <> -void NENonLinearFilterKernel::median_filter_disk<5, 5>(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - - static const uint8x16_t zero = vdupq_n_u8(0); - const auto input_top2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -2))); - const auto input_top_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -1))); - const auto input_mid_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 0))); - const auto input_bot_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 1))); - const auto input_bot2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 2))); - - execute_window_loop(win, [&](const Coordinates &) - { - const uint8x16_t top2_data = vextq_u8(vld1q_u8(input_top2_ptr + input.offset()), zero, 1); - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - const uint8x16_t bot2_data = vextq_u8(vld1q_u8(input_bot2_ptr + input.offset()), zero, 1); - - std::array d = - { - vget_low_u8(top2_data), - vget_high_u8(top2_data), - vget_low_u8(top_data), - vget_high_u8(top_data), - vget_low_u8(mid_data), - vget_high_u8(mid_data), - vget_low_u8(bot_data), - vget_high_u8(bot_data), - vget_low_u8(bot2_data), - vget_high_u8(bot2_data) - }; - - std::array p{ 0 }; - p[0] = d[0]; - p[1] = vext_u8(d[0], d[1], 1); - p[2] = vext_u8(d[0], d[1], 2); - p[18] = d[8]; - p[19] = vext_u8(d[8], d[9], 1); - p[20] = vext_u8(d[8], d[9], 2); - - for(unsigned int i = 0; i < 3; ++i) - { - const unsigned int idx_d = 2 + i * 2; - const unsigned int idx_p = 3 + i * 5; - - p[idx_p] = d[idx_d]; - p[idx_p + 1] = vext_u8(d[idx_d], d[idx_d + 1], 1); - p[idx_p + 2] = vext_u8(d[idx_d], d[idx_d + 1], 2); - p[idx_p + 3] = vext_u8(d[idx_d], d[idx_d + 1], 3); - p[idx_p + 4] = vext_u8(d[idx_d], d[idx_d + 1], 4); - } - - sort21(p); - - vst1_u8(output.ptr(), p[10]); - }, - input, output); -} - -template <> -void NENonLinearFilterKernel::min_filter_disk<5, 5>(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - - static const uint8x16_t zero = vdupq_n_u8(0); - const auto input_top2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -2))); - const auto input_top_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -1))); - const auto input_mid_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 0))); - const auto input_bot_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 1))); - const auto input_bot2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 2))); - - execute_window_loop(win, [&](const Coordinates &) - { - const uint8x16_t top2_data = vextq_u8(vld1q_u8(input_top2_ptr + input.offset()), zero, 1); - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - const uint8x16_t bot2_data = vextq_u8(vld1q_u8(input_bot2_ptr + input.offset()), zero, 1); - - const uint8x16_t rows_min_3 = vminq_u8(top2_data, bot2_data); - uint8x16_t rows_min_5 = vminq_u8(top_data, bot_data); - rows_min_5 = vminq_u8(rows_min_5, mid_data); - - const uint8x8_t out_3 = min_row<3>(rows_min_3); - const uint8x8_t out_5 = min_row<5>(rows_min_5); - - vst1_u8(output.ptr(), vmin_u8(out_3, out_5)); - }, - input, output); -} - -template <> -void NENonLinearFilterKernel::max_filter_disk<5, 5>(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - - static const uint8x16_t zero = vdupq_n_u8(0); - const auto input_top2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -2))); - const auto input_top_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, -1))); - const auto input_mid_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 0))); - const auto input_bot_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 1))); - const auto input_bot2_ptr = static_cast(_input->ptr_to_element(Coordinates(-2, 2))); - - execute_window_loop(win, [&](const Coordinates &) - { - const uint8x16_t top2_data = vextq_u8(vld1q_u8(input_top2_ptr + input.offset()), zero, 1); - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - const uint8x16_t bot2_data = vextq_u8(vld1q_u8(input_bot2_ptr + input.offset()), zero, 1); - - const uint8x16_t rows_max_3 = vmaxq_u8(top2_data, bot2_data); - uint8x16_t rows_max_5 = vmaxq_u8(top_data, bot_data); - rows_max_5 = vmaxq_u8(rows_max_5, mid_data); - - const uint8x8_t out_3 = max_row<3>(rows_max_3); - const uint8x8_t out_5 = max_row<5>(rows_max_5); - - vst1_u8(output.ptr(), vmax_u8(out_3, out_5)); - }, - input, output); -} - -template -void NENonLinearFilterKernel::non_linear_filter_generic(const Window &win) -{ - Iterator input(_input, win); - Iterator output(_output, win); - ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr); - - const int k_row_half = mask_h / 2; - const int k_col_half = mask_w / 2; - constexpr int mask_size = mask_w * mask_h; - - // Set row pointers - std::array input_ptrs{ {} }; - for(int i = -k_row_half; i <= k_row_half; ++i) - { - input_ptrs[k_row_half + i] = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-k_col_half, i)); - } - - std::array vals{ {} }; - - execute_window_loop(win, [&](const Coordinates &) - { - // Clear array - std::fill(std::begin(vals), std::end(vals), 0); - - size_t v = 0; - size_t m = 0; - - for(unsigned int r = 0; r < mask_h; ++r) - { - const auto in_ptr = static_cast(input_ptrs[r] + input.offset()); - - for(unsigned int c = 0; c < mask_w; ++c, ++m) - { - if(_mask[m] == 255) - { - vals[v] = in_ptr[c]; - ++v; - } - } - } - - // Only do something if there is at least one non-zero element in the - // mask - if(v > 0) - { - std::sort(vals.begin(), vals.begin() + v); - - switch(_function) - { - case NonLinearFilterFunction::MIN: - *output.ptr() = vals[0]; - break; - case NonLinearFilterFunction::MAX: - *output.ptr() = vals[v - 1]; - break; - case NonLinearFilterFunction::MEDIAN: - *output.ptr() = vals[v / 2]; - break; - default: - break; - } - } - }, - input, output); -} - -void NENonLinearFilterKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - using NonLinearFilterFunction = void (NENonLinearFilterKernel::*)(const Window & window); - - // Function table for BOX pattern - static const std::array func_table_box = - { - { - &NENonLinearFilterKernel::median_filter_box<3, 3>, - &NENonLinearFilterKernel::min_filter_box<3, 3>, - &NENonLinearFilterKernel::max_filter_box<3, 3>, - &NENonLinearFilterKernel::median_filter_box<5, 5>, - &NENonLinearFilterKernel::min_filter_box<5, 5>, - &NENonLinearFilterKernel::max_filter_box<5, 5>, - } - }; - - // Function table for CROSS pattern - static const std::array func_table_cross = - { - { - &NENonLinearFilterKernel::median_filter_cross<3, 3>, - &NENonLinearFilterKernel::min_filter_cross<3, 3>, - &NENonLinearFilterKernel::max_filter_cross<3, 3>, - &NENonLinearFilterKernel::median_filter_cross<5, 5>, - &NENonLinearFilterKernel::min_filter_cross<5, 5>, - &NENonLinearFilterKernel::max_filter_cross<5, 5>, - } - }; - - // Function table for DISK pattern - static const std::array func_table_disk = - { - { - &NENonLinearFilterKernel::median_filter_box<3, 3>, - &NENonLinearFilterKernel::min_filter_box<3, 3>, - &NENonLinearFilterKernel::max_filter_box<3, 3>, - &NENonLinearFilterKernel::median_filter_disk<5, 5>, - &NENonLinearFilterKernel::min_filter_disk<5, 5>, - &NENonLinearFilterKernel::max_filter_disk<5, 5>, - } - }; - - // Function table for OTHER pattern - static const std::array func_table_generic = - { - { - &NENonLinearFilterKernel::non_linear_filter_generic<3, 3>, - &NENonLinearFilterKernel::non_linear_filter_generic<5, 5>, - } - }; - - switch(_pattern) - { - case MatrixPattern::BOX: - ARM_COMPUTE_ERROR_ON(_func_idx >= func_table_box.size()); - (this->*func_table_box[_func_idx])(window); - break; - case MatrixPattern::CROSS: - ARM_COMPUTE_ERROR_ON(_func_idx >= func_table_cross.size()); - (this->*func_table_cross[_func_idx])(window); - break; - case MatrixPattern::DISK: - ARM_COMPUTE_ERROR_ON(_func_idx >= func_table_disk.size()); - (this->*func_table_disk[_func_idx])(window); - break; - case MatrixPattern::OTHER: - default: - ARM_COMPUTE_ERROR_ON(_func_idx >= func_table_generic.size()); - (this->*func_table_generic[_func_idx])(window); - break; - } -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NENonLinearFilterKernel.h b/src/core/NEON/kernels/NENonLinearFilterKernel.h deleted file mode 100644 index 3cef12e8ec..0000000000 --- a/src/core/NEON/kernels/NENonLinearFilterKernel.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H -#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/INEKernel.h" - -#include - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to apply a non-linear filter */ -class NENonLinearFilterKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NENonLinearFilterKernel"; - } - /** Default constructor */ - NENonLinearFilterKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; - /** Allow instances of this class to be moved */ - NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; - /** Allow instances of this class to be moved */ - NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; - /** Default destructor */ - ~NENonLinearFilterKernel() = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Fill mask with the corresponding given pattern. - * - * @param[in,out] mask Mask to be filled according to pattern - * @param[in] cols Columns (width) of mask - * @param[in] rows Rows (height) of mask - * @param[in] pattern Pattern to fill the mask according to - */ - void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); - /** Apply a median filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template - void median_filter_box(const Window &win); - /** Apply a min filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template - void min_filter_box(const Window &win); - /** Apply a max filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template - void max_filter_box(const Window &win); - /** Apply a median filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template - void median_filter_cross(const Window &win); - /** Apply a min filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template - void min_filter_cross(const Window &win); - /** Apply a max filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template - void max_filter_cross(const Window &win); - /** Apply a median filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template - void median_filter_disk(const Window &win); - /** Apply a min filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template - void min_filter_disk(const Window &win); - /** Apply a max filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template - void max_filter_disk(const Window &win); - /** Apply a non-linear filter when given mask has user-defined pattern. - * - * @param[in] win Window to apply the filter on. - */ - template - void non_linear_filter_generic(const Window &win); - -private: - unsigned int _border_width; - const ITensor *_input; - ITensor *_output; - const uint8_t *_mask; - MatrixPattern _pattern; - NonLinearFilterFunction _function; - unsigned int _func_idx; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */ diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h index 7b0bc0c720..4194dac68e 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h @@ -34,7 +34,6 @@ class ITensor; /** Interface to perform Non-Maxima suppression over a 3x3 window using Neon * - * @note Used by @ref NEFastCorners and @ref NEHarrisCorners */ class NENonMaximaSuppression3x3Kernel : public INEKernel { diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp deleted file mode 100644 index b334a11227..0000000000 --- a/src/core/NEON/kernels/NERemapKernel.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NERemapKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/ScaleHelpers.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -namespace -{ -inline int32x4_t offset_nearest_interpolation(const float *mapx_ptr, const float *mapy_ptr, const float32x4_t &width, const float32x4_t &height, const int32x4_t &stride) -{ - const float32x4_t lowerxy = vdupq_n_f32(-1.f); - - float32x4_t x = vld1q_f32(mapx_ptr); - float32x4_t y = vld1q_f32(mapy_ptr); - - // Clamp x coordinates - x = vmaxq_f32(lowerxy, vminq_f32(x, width)); - y = vmaxq_f32(lowerxy, vminq_f32(y, height)); - - const int32x4_t x_s32 = vcvtq_s32_f32(x); - const int32x4_t y_s32 = vcvtq_s32_f32(y); - - return vmlaq_s32(x_s32, y_s32, stride); -} - -} // namespace - -NERemapKernel::NERemapKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr) -{ -} - -BorderSize NERemapKernel::border_size() const -{ - return BorderSize(1); -} - -void NERemapKernel::configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); - - _input = input; - _output = output; - _map_x = map_x; - _map_y = map_y; - - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - { - _func = &NERemapKernel::remap_nearest; - break; - } - case InterpolationPolicy::BILINEAR: - { - _func = &NERemapKernel::remap_bilinear; - break; - } - default: - ARM_COMPUTE_ERROR("Unsupported interpolation mode"); - break; - } - - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - const int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration); - const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0); - - AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom); - - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal mapx_access(map_x->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal mapy_access(map_y->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, mapx_access, mapy_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -void NERemapKernel::remap_nearest(const Window &window) -{ - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - Iterator mapx(_map_x, window); - Iterator mapy(_map_y, window); - - const float32x4_t width = vdupq_n_f32(static_cast(_input->info()->dimension(0))); - const float32x4_t height = vdupq_n_f32(static_cast(_input->info()->dimension(1))); - const int32x4_t in_stride = vdupq_n_s32(static_cast(_input->info()->strides_in_bytes()[1])); - - execute_window_loop(window, [&](const Coordinates &) - { - const auto mapx_ptr = reinterpret_cast(mapx.ptr()); - const auto mapy_ptr = reinterpret_cast(mapy.ptr()); - const uint8_t *in_ptr = in.ptr(); - - const int32x4_t offset0 = offset_nearest_interpolation(mapx_ptr + 0, mapy_ptr + 0, width, height, in_stride); - const int32x4_t offset1 = offset_nearest_interpolation(mapx_ptr + 4, mapy_ptr + 4, width, height, in_stride); - const int32x4_t offset2 = offset_nearest_interpolation(mapx_ptr + 8, mapy_ptr + 8, width, height, in_stride); - const int32x4_t offset3 = offset_nearest_interpolation(mapx_ptr + 12, mapy_ptr + 12, width, height, in_stride); - - uint8x16_t tmp = vdupq_n_u8(0); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 0)], tmp, 0); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 1)], tmp, 1); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 2)], tmp, 2); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 3)], tmp, 3); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 0)], tmp, 4); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 1)], tmp, 5); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 2)], tmp, 6); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 3)], tmp, 7); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 0)], tmp, 8); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 1)], tmp, 9); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 2)], tmp, 10); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 3)], tmp, 11); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 0)], tmp, 12); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 1)], tmp, 13); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 2)], tmp, 14); - tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 3)], tmp, 15); - vst1q_u8(out.ptr(), tmp); - }, - in, out, mapx, mapy); -} - -void NERemapKernel::remap_bilinear(const Window &window) -{ - using namespace scale_helpers; - - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - Iterator mapx(_map_x, window); - Iterator mapy(_map_y, window); - - const size_t width = _input->info()->dimension(0); - const size_t height = _input->info()->dimension(1); - const size_t in_stride = _input->info()->strides_in_bytes()[1]; - - execute_window_loop(window, [&](const Coordinates &) - { - const auto mapx_ptr = reinterpret_cast(mapx.ptr()); - const auto mapy_ptr = reinterpret_cast(mapy.ptr()); - const uint8_t *in_ptr = in.ptr(); - - uint8x8_t tmp0 = vdup_n_u8(0); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[0], mapy_ptr[0]), tmp0, 0); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[1], mapy_ptr[1]), tmp0, 1); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[2], mapy_ptr[2]), tmp0, 2); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[3], mapy_ptr[3]), tmp0, 3); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[4], mapy_ptr[4]), tmp0, 4); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[5], mapy_ptr[5]), tmp0, 5); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[6], mapy_ptr[6]), tmp0, 6); - tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[7], mapy_ptr[7]), tmp0, 7); - - uint8x8_t tmp1 = vdup_n_u8(0); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[8], mapy_ptr[8]), tmp1, 0); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[9], mapy_ptr[9]), tmp1, 1); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[10], mapy_ptr[10]), tmp1, 2); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[11], mapy_ptr[11]), tmp1, 3); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[12], mapy_ptr[12]), tmp1, 4); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[13], mapy_ptr[13]), tmp1, 5); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[14], mapy_ptr[14]), tmp1, 6); - tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[15], mapy_ptr[15]), tmp1, 7); - - vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1)); - }, - in, out, mapx, mapy); -} - -void NERemapKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} diff --git a/src/core/NEON/kernels/NERemapKernel.h b/src/core/NEON/kernels/NERemapKernel.h deleted file mode 100644 index adc7f4bdd5..0000000000 --- a/src/core/NEON/kernels/NERemapKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREMAPKERNEL_H -#define ARM_COMPUTE_NEREMAPKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Neon kernel to perform a remap on a tensor */ -class NERemapKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NERemapKernel"; - } - /** Default constructor */ - NERemapKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERemapKernel(const NERemapKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERemapKernel &operator=(const NERemapKernel &) = delete; - /** Allow instances of this class to be moved */ - NERemapKernel(NERemapKernel &&) = default; - /** Allow instances of this class to be moved */ - NERemapKernel &operator=(NERemapKernel &&) = default; - /** Default destructor */ - ~NERemapKernel() = default; - - /** Initialize the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] map_x Map for X coordinates. Data type supported: F32. - * @param[in] map_y Map for Y coordinates. Data type supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - */ - void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** function to perform nearest interpolation on the given window */ - void remap_nearest(const Window &window); - /** function to perform bilinear interpolation on the given window */ - void remap_bilinear(const Window &window); - /** Remap function to use for the particular interpolation type passed to configure() */ - void (NERemapKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input image */ - ITensor *_output; /**< Output image */ - const ITensor *_map_x; /**< Input remap x coordinates */ - const ITensor *_map_y; /**< Input remap y coordinates */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp b/src/core/NEON/kernels/NEScharr3x3Kernel.cpp deleted file mode 100644 index 58b8caa2b6..0000000000 --- a/src/core/NEON/kernels/NEScharr3x3Kernel.cpp +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -namespace -{ -const int16x8_t three = vdupq_n_s16(3); -const int16x8_t minus_three = vdupq_n_s16(-3); -const int16x8_t ten = vdupq_n_s16(10); -const int16x8_t minus_ten = vdupq_n_s16(-10); - -inline int16x8_t scharr_y(const int16x8x2_t &top, const int16x8x2_t &bottom) -{ - // Top left - int16x8_t out = vmulq_s16(top.val[0], minus_three); - // Top center - out = vmlaq_s16(out, vextq_s16(top.val[0], top.val[1], 1), minus_ten); - // Top right - out = vmlaq_s16(out, vextq_s16(top.val[0], top.val[1], 2), minus_three); - - // Bottom left - out = vmlaq_s16(out, bottom.val[0], three); - // Bottom center - out = vmlaq_s16(out, vextq_s16(bottom.val[0], bottom.val[1], 1), ten); - // Bottom right - out = vmlaq_s16(out, vextq_s16(bottom.val[0], bottom.val[1], 2), three); - - return out; -} - -inline int16x8_t scharr_x(const int16x8x2_t &top, const int16x8x2_t &middle, const int16x8x2_t &bottom) -{ - // Top left - int16x8_t out = vmulq_s16(top.val[0], minus_three); - // Top right - out = vmlaq_s16(out, vextq_s16(top.val[0], top.val[1], 2), three); - - // Middle left - out = vmlaq_s16(out, middle.val[0], minus_ten); - // Middle right - out = vmlaq_s16(out, vextq_s16(middle.val[0], middle.val[1], 2), ten); - - // Bottom left - out = vmlaq_s16(out, bottom.val[0], minus_three); - // Bottom right - out = vmlaq_s16(out, vextq_s16(bottom.val[0], bottom.val[1], 2), three); - - return out; -} -} // namespace - -NEScharr3x3Kernel::NEScharr3x3Kernel() - : _run_scharr_x(false), _run_scharr_y(false), _input(nullptr), _output_x(nullptr), _output_y(nullptr) -{ -} - -void NEScharr3x3Kernel::configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_scharr_x = output_x != nullptr; - _run_scharr_y = output_y != nullptr; - - if(_run_scharr_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_scharr_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_x_access, - output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -BorderSize NEScharr3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void NEScharr3x3Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-1, -1)); - const unsigned char *const input_mid_ptr = _input->ptr_to_element(Coordinates(-1, 0)); - const unsigned char *const input_bot_ptr = _input->ptr_to_element(Coordinates(-1, +1)); - - Iterator input(_input, window); - Iterator output_y; - Iterator output_x; - - if(_run_scharr_y) - { - output_y = Iterator(_output_y, window); - } - - if(_run_scharr_x) - { - output_x = Iterator(_output_x, window); - } - - if(_run_scharr_x && _run_scharr_y) - { - execute_window_loop(window, [&](const Coordinates &) - { - - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t mid_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - vst1q_s16(reinterpret_cast(output_x.ptr()), scharr_x(top_s16, mid_s16, bot_s16)); - vst1q_s16(reinterpret_cast(output_y.ptr()), scharr_y(top_s16, bot_s16)); - }, - input, output_x, output_y); - } - else if(_run_scharr_x) - { - execute_window_loop(window, [&](const Coordinates &) - { - - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t mid_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - vst1q_s16(reinterpret_cast(output_x.ptr()), scharr_x(top_s16, mid_s16, bot_s16)); - }, - input, output_x); - } - else if(_run_scharr_y) - { - execute_window_loop(window, [&](const Coordinates &) - { - - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - vst1q_s16(reinterpret_cast(output_y.ptr()), scharr_y(top_s16, bot_s16)); - }, - input, output_y); - } -} diff --git a/src/core/NEON/kernels/NEScharr3x3Kernel.h b/src/core/NEON/kernels/NEScharr3x3Kernel.h deleted file mode 100644 index 920410ebb3..0000000000 --- a/src/core/NEON/kernels/NEScharr3x3Kernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H -#define ARM_COMPUTE_NESCHARR3x3KERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. - * -* @f[ -* \mathbf{G}_x=\begin{vmatrix} -* -3 & 0 & +3\\ -* -10& 0 & +10\\ -* -3 & 0 & +3 -* \end{vmatrix} -* @f] -*/ -class NEScharr3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEScharr3x3Kernel"; - } - /** Default constructor */ - NEScharr3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; - /** Default destructor */ - ~NEScharr3x3Kernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - bool _run_scharr_x; /**< Do we need to run Scharr X ? */ - bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor for scharr X */ - ITensor *_output_y; /**< Output tensor for scharr Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.cpp b/src/core/NEON/kernels/NESobel3x3Kernel.cpp deleted file mode 100644 index ecf6b59c29..0000000000 --- a/src/core/NEON/kernels/NESobel3x3Kernel.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NESobel3x3Kernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -NESobel3x3Kernel::NESobel3x3Kernel() - : _run_sobel_x(false), _run_sobel_y(false), _input(nullptr), _output_x(nullptr), _output_y(nullptr) -{ -} - -BorderSize NESobel3x3Kernel::border_size() const -{ - return BorderSize{ 1 }; -} - -void NESobel3x3Kernel::configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_x_access, - output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NESobel3x3Kernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-1, -1)); - const unsigned char *const input_mid_ptr = _input->ptr_to_element(Coordinates(-1, 0)); - const unsigned char *const input_bot_ptr = _input->ptr_to_element(Coordinates(-1, 1)); - - Iterator input(_input, window); - Iterator output_y; - Iterator output_x; - - if(_run_sobel_y) - { - output_y = Iterator(_output_y, window); - } - - if(_run_sobel_x) - { - output_x = Iterator(_output_x, window); - } - - static const int16x8_t two = vdupq_n_s16(2); - static const int16x8_t minustwo = vdupq_n_s16(-2); - - if(_run_sobel_y && _run_sobel_x) - { - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t mid_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - //SOBEL Y - //top left - int16x8_t out_y = vnegq_s16(top_s16.val[0]); - //top mid - out_y = vmlaq_s16(out_y, vextq_s16(top_s16.val[0], top_s16.val[1], 1), minustwo); - //top right - out_y = vsubq_s16(out_y, vextq_s16(top_s16.val[0], top_s16.val[1], 2)); - //bot left - out_y = vaddq_s16(out_y, bot_s16.val[0]); - //bot mid - out_y = vmlaq_s16(out_y, vextq_s16(bot_s16.val[0], bot_s16.val[1], 1), two); - //bot right - out_y = vaddq_s16(out_y, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2)); - - vst1q_s16(reinterpret_cast(output_y.ptr()), out_y); - - //SOBEL X - //top left - int16x8_t out_x = vnegq_s16(top_s16.val[0]); - //top right - out_x = vaddq_s16(out_x, vextq_s16(top_s16.val[0], top_s16.val[1], 2)); - //mid left - out_x = vmlaq_s16(out_x, mid_s16.val[0], minustwo); - //mid right - out_x = vmlaq_s16(out_x, vextq_s16(mid_s16.val[0], mid_s16.val[1], 2), two); - //bot left - out_x = vsubq_s16(out_x, bot_s16.val[0]); - //bot right - out_x = vaddq_s16(out_x, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2)); - - vst1q_s16(reinterpret_cast(output_x.ptr()), out_x); - }, - input, output_x, output_y); - } - else if(_run_sobel_x) - { - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t mid_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - //SOBEL X - //top left - int16x8_t out = vnegq_s16(top_s16.val[0]); - //top right - out = vaddq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 2)); - //mid left - out = vmlaq_s16(out, mid_s16.val[0], minustwo); - //mid right - out = vmlaq_s16(out, vextq_s16(mid_s16.val[0], mid_s16.val[1], 2), two); - //bot left - out = vsubq_s16(out, bot_s16.val[0]); - //bot right - out = vaddq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2)); - - vst1q_s16(reinterpret_cast(output_x.ptr()), out); - }, - input, output_x); - } - else if(_run_sobel_y) - { - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset()); - const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset()); - - const int16x8x2_t top_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data))) - } - }; - const int16x8x2_t bot_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data))) - } - }; - - //SOBEL Y - //top left - int16x8_t out = vnegq_s16(top_s16.val[0]); - //top mid - out = vmlaq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 1), minustwo); - //top right - out = vsubq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 2)); - //bot left - out = vaddq_s16(out, bot_s16.val[0]); - //bot mid - out = vmlaq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 1), two); - //bot right - out = vaddq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2)); - - vst1q_s16(reinterpret_cast(output_y.ptr()), out); - }, - input, output_y); - } -} diff --git a/src/core/NEON/kernels/NESobel3x3Kernel.h b/src/core/NEON/kernels/NESobel3x3Kernel.h deleted file mode 100644 index 2c3eaf5eb7..0000000000 --- a/src/core/NEON/kernels/NESobel3x3Kernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H -#define ARM_COMPUTE_NESOBEL3x3KERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. - * - * @f[ - * \mathbf{G}_x=\begin{vmatrix} - * -1 & 0 & +1\\ - * -2 & 0 & +2\\ - * -1 & 0 & +1 - * \end{vmatrix} - * @f] -*/ -class NESobel3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel3x3Kernel"; - } - /** Default constructor */ - NESobel3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel3x3Kernel(NESobel3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; - /** Default destructor */ - ~NESobel3x3Kernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor for sobel X */ - ITensor *_output_y; /**< Output tensor for sobel Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.cpp b/src/core/NEON/kernels/NESobel5x5Kernel.cpp deleted file mode 100644 index 5a66b1f364..0000000000 --- a/src/core/NEON/kernels/NESobel5x5Kernel.cpp +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NESobel5x5Kernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -using namespace arm_compute; - -NESobel5x5HorKernel::NESobel5x5HorKernel() - : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false), _border_size(0) -{ -} - -BorderSize NESobel5x5HorKernel::border_size() const -{ - return _border_size; -} - -void NESobel5x5HorKernel::configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - _border_size = BorderSize(border_undefined ? 0 : 2, 2); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), -border_size().left, num_elems_read_per_iteration), - output_x_access, - output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NESobel5x5HorKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - Window win_in(window); - win_in.shift(Window::DimX, -2); - - Iterator input(_input, win_in); - Iterator output_x; - Iterator output_y; - - if(_run_sobel_x) - { - output_x = Iterator(_output_x, window); - } - - if(_run_sobel_y) - { - output_y = Iterator(_output_y, window); - } - - if(_run_sobel_y && _run_sobel_x) - { - static const int16x8_t six = vdupq_n_s16(6); - static const int16x8_t four = vdupq_n_s16(4); - static const int16x8_t two = vdupq_n_s16(2); - static const int16x8_t minustwo = vdupq_n_s16(-2); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr()); - - const int16x8x2_t data_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data))) - } - }; - - int16x8_t out_y = data_s16.val[0]; - out_y = vmlaq_s16(out_y, vextq_s16(data_s16.val[0], data_s16.val[1], 1), four); - out_y = vmlaq_s16(out_y, vextq_s16(data_s16.val[0], data_s16.val[1], 2), six); - out_y = vmlaq_s16(out_y, vextq_s16(data_s16.val[0], data_s16.val[1], 3), four); - out_y = vaddq_s16(out_y, vextq_s16(data_s16.val[0], data_s16.val[1], 4)); - - vst1q_s16(reinterpret_cast(output_y.ptr()), out_y); - - int16x8_t out_x = vnegq_s16(data_s16.val[0]); - out_x = vmlaq_s16(out_x, vextq_s16(data_s16.val[0], data_s16.val[1], 1), minustwo); - out_x = vmlaq_s16(out_x, vextq_s16(data_s16.val[0], data_s16.val[1], 3), two); - out_x = vaddq_s16(out_x, vextq_s16(data_s16.val[0], data_s16.val[1], 4)); - - vst1q_s16(reinterpret_cast(output_x.ptr()), out_x); - }, - input, output_x, output_y); - } - else if(_run_sobel_x) - { - static const int16x8_t two = vdupq_n_s16(2); - static const int16x8_t minustwo = vdupq_n_s16(-2); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr()); - - const int16x8x2_t data_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data))) - } - }; - - int16x8_t out = vnegq_s16(data_s16.val[0]); - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 1), minustwo); - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 3), two); - out = vaddq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 4)); - - vst1q_s16(reinterpret_cast(output_x.ptr()), out); - }, - input, output_x); - } - else if(_run_sobel_y) - { - static const int16x8_t six = vdupq_n_s16(6); - static const int16x8_t four = vdupq_n_s16(4); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr()); - - const int16x8x2_t data_s16 = - { - { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data))) - } - }; - - int16x8_t out = data_s16.val[0]; - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 1), four); - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 2), six); - out = vmlaq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 3), four); - out = vaddq_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 4)); - - vst1q_s16(reinterpret_cast(output_y.ptr()), out); - }, - input, output_y); - } -} - -NESobel5x5VertKernel::NESobel5x5VertKernel() - : _input_x(nullptr), _input_y(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false) -{ -} - -BorderSize NESobel5x5VertKernel::border_size() const -{ - return BorderSize{ 2, 0 }; -} - -void NESobel5x5VertKernel::configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input_x, Format::S16); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output_x, Format::S16); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input_y, Format::S16); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output_y, Format::S16); - } - - _input_x = input_x; - _input_y = input_y; - _output_x = output_x; - _output_y = output_y; - - const ITensor *const input = _run_sobel_x ? input_x : input_y; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = 5; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input_x == nullptr ? nullptr : input_x->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - AccessWindowRectangle(input_y == nullptr ? nullptr : input_y->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_x_access, - output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NESobel5x5VertKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - Iterator input_x; - Iterator input_y; - Iterator output_x; - Iterator output_y; - - const int16_t *input_x_low2_ptr = nullptr; - const int16_t *input_x_low_ptr = nullptr; - const int16_t *input_x_mid_ptr = nullptr; - const int16_t *input_x_top_ptr = nullptr; - const int16_t *input_x_top2_ptr = nullptr; - - const int16_t *input_y_low2_ptr = nullptr; - const int16_t *input_y_low_ptr = nullptr; - const int16_t *input_y_top_ptr = nullptr; - const int16_t *input_y_top2_ptr = nullptr; - - if(_run_sobel_x) - { - input_x = Iterator(_input_x, window); - output_x = Iterator(_output_x, window); - input_x_top2_ptr = reinterpret_cast(_input_x->ptr_to_element(Coordinates(0, -2))); - input_x_top_ptr = reinterpret_cast(_input_x->ptr_to_element(Coordinates(0, -1))); - input_x_mid_ptr = reinterpret_cast(_input_x->ptr_to_element(Coordinates(0, 0))); - input_x_low_ptr = reinterpret_cast(_input_x->ptr_to_element(Coordinates(0, 1))); - input_x_low2_ptr = reinterpret_cast(_input_x->ptr_to_element(Coordinates(0, 2))); - } - - if(_run_sobel_y) - { - input_y = Iterator(_input_y, window); - output_y = Iterator(_output_y, window); - input_y_top2_ptr = reinterpret_cast(_input_y->ptr_to_element(Coordinates(0, -2))); - input_y_top_ptr = reinterpret_cast(_input_y->ptr_to_element(Coordinates(0, -1))); - input_y_low_ptr = reinterpret_cast(_input_y->ptr_to_element(Coordinates(0, 1))); - input_y_low2_ptr = reinterpret_cast(_input_y->ptr_to_element(Coordinates(0, 2))); - } - - static const int16x8_t six = vdupq_n_s16(6); - static const int16x8_t four = vdupq_n_s16(4); - static const int16x8_t two = vdupq_n_s16(2); - static const int16x8_t minustwo = vdupq_n_s16(-2); - - if(_run_sobel_x) - { - execute_window_loop(window, [&](const Coordinates &) - { - // Convert offset from uint8_t* to uint16_t* - const size_t input_offset_high_s16 = input_x.offset() / 2; - const size_t input_offset_low_s16 = input_offset_high_s16 + 8; - - //HIGH DATA - //top2 - int16x8_t data_high = vld1q_s16(input_x_top2_ptr + input_offset_high_s16); - int16x8_t out_high = data_high; - //top - data_high = vld1q_s16(input_x_top_ptr + input_offset_high_s16); - out_high = vmlaq_s16(out_high, data_high, four); - //mid - data_high = vld1q_s16(input_x_mid_ptr + input_offset_high_s16); - out_high = vmlaq_s16(out_high, data_high, six); - //low - data_high = vld1q_s16(input_x_low_ptr + input_offset_high_s16); - out_high = vmlaq_s16(out_high, data_high, four); - //low2 - data_high = vld1q_s16(input_x_low2_ptr + input_offset_high_s16); - out_high = vaddq_s16(out_high, data_high); - - vst1q_s16((reinterpret_cast(output_x.ptr())), out_high); - - //LOW DATA - //top2 - int16x8_t data_low = vld1q_s16(input_x_top2_ptr + input_offset_low_s16); - int16x8_t out_low = data_low; - //top - data_low = vld1q_s16(input_x_top_ptr + input_offset_low_s16); - out_low = vmlaq_s16(out_low, data_low, four); - //mid - data_low = vld1q_s16(input_x_mid_ptr + input_offset_low_s16); - out_low = vmlaq_s16(out_low, data_low, six); - //low - data_low = vld1q_s16(input_x_low_ptr + input_offset_low_s16); - out_low = vmlaq_s16(out_low, data_low, four); - //low2 - data_low = vld1q_s16(input_x_low2_ptr + input_offset_low_s16); - out_low = vaddq_s16(out_low, data_low); - - vst1q_s16((reinterpret_cast(output_x.ptr())) + 8, out_low); - }, - input_x, output_x); - } - - if(_run_sobel_y) - { - execute_window_loop(window, [&](const Coordinates &) - { - // Convert offset from uint8_t* to uint16_t* - const size_t input_offset_high_s16 = input_y.offset() / 2; - const size_t input_offset_low_s16 = input_offset_high_s16 + 8; - - //HIGH DATA - //top2 - int16x8_t data_high = vld1q_s16(input_y_top2_ptr + input_offset_high_s16); - int16x8_t out_high = vnegq_s16(data_high); - //top - data_high = vld1q_s16(input_y_top_ptr + input_offset_high_s16); - out_high = vmlaq_s16(out_high, data_high, minustwo); - //low - data_high = vld1q_s16(input_y_low_ptr + input_offset_high_s16); - out_high = vmlaq_s16(out_high, data_high, two); - //low2 - data_high = vld1q_s16(input_y_low2_ptr + input_offset_high_s16); - out_high = vaddq_s16(out_high, data_high); - - vst1q_s16((reinterpret_cast(output_y.ptr())), out_high); - - //LOW DATA - //top2 - int16x8_t data_low = vld1q_s16(input_y_top2_ptr + input_offset_low_s16); - int16x8_t out_low = vnegq_s16(data_low); - //top - data_low = vld1q_s16(input_y_top_ptr + input_offset_low_s16); - out_low = vmlaq_s16(out_low, data_low, minustwo); - //low - data_low = vld1q_s16(input_y_low_ptr + input_offset_low_s16); - out_low = vmlaq_s16(out_low, data_low, two); - //low2 - data_low = vld1q_s16(input_y_low2_ptr + input_offset_low_s16); - out_low = vaddq_s16(out_low, data_low); - - vst1q_s16((reinterpret_cast(output_y.ptr())) + 8, out_low); - }, - input_y, output_y); - } -} diff --git a/src/core/NEON/kernels/NESobel5x5Kernel.h b/src/core/NEON/kernels/NESobel5x5Kernel.h deleted file mode 100644 index bd5eb29296..0000000000 --- a/src/core/NEON/kernels/NESobel5x5Kernel.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H -#define ARM_COMPUTE_NESOBEL5x5KERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. - * - */ -class NESobel5x5HorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel5x5HorKernel"; - } - /** Default constructor */ - NESobel5x5HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; - /** Default destructor */ - ~NESobel5x5HorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< X output of horizontal pass */ - ITensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. - * -*/ -class NESobel5x5VertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel5x5VertKernel"; - } - /** Default constructor */ - NESobel5x5VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; - /** Default destructor */ - ~NESobel5x5VertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. - * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. - * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - ITensor *_input_x; /**< X input (X output of the hor pass) */ - ITensor *_input_y; /**< Y input (Y output of the hor pass) */ - ITensor *_output_x; /**< X output of sobel */ - ITensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */ diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.cpp b/src/core/NEON/kernels/NESobel7x7Kernel.cpp deleted file mode 100644 index 835b333a10..0000000000 --- a/src/core/NEON/kernels/NESobel7x7Kernel.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NESobel7x7Kernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - -namespace -{ -const int32x4_t minusfour = vdupq_n_s32(-4); -const int32x4_t minusfive = vdupq_n_s32(-5); -const int32x4_t four = vdupq_n_s32(4); -const int32x4_t five = vdupq_n_s32(5); -const int32x4_t six = vdupq_n_s32(6); -const int32x4_t fifteen = vdupq_n_s32(15); -const int32x4_t twenty = vdupq_n_s32(20); - -inline int32x4x2_t compute_hor_sobel_x(const int32x4x4_t &data) -{ - int32x4x2_t out = - { - { - vnegq_s32(data.val[0]), - vnegq_s32(data.val[1]) - } - }; - - out.val[0] = vmlaq_s32(out.val[0], - vextq_s32(data.val[0], data.val[1], 1), minusfour); - - out.val[0] = vmlaq_s32(out.val[0], - vextq_s32(data.val[0], data.val[1], 2), minusfive); - - out.val[0] = vmlaq_s32(out.val[0], data.val[1], five); - - out.val[0] = vmlaq_s32(out.val[0], - vextq_s32(data.val[1], data.val[2], 1), four); - - out.val[0] = vaddq_s32(out.val[0], - vextq_s32(data.val[1], data.val[2], 2)); - - out.val[1] = vmlaq_s32(out.val[1], - vextq_s32(data.val[1], data.val[2], 1), minusfour); - - out.val[1] = vmlaq_s32(out.val[1], - vextq_s32(data.val[1], data.val[2], 2), minusfive); - - out.val[1] = vmlaq_s32(out.val[1], data.val[2], five); - - out.val[1] = vmlaq_s32(out.val[1], - vextq_s32(data.val[2], data.val[3], 1), four); - - out.val[1] = vaddq_s32(out.val[1], - vextq_s32(data.val[2], data.val[3], 2)); - - return out; -} - -inline int32x4x2_t compute_hor_sobel_y(const int32x4x4_t &data) -{ - int32x4x2_t out = - { - { - data.val[0], - data.val[1] - } - }; - - out.val[0] = vmlaq_s32(out.val[0], - vextq_s32(data.val[0], data.val[1], 1), six); - - out.val[0] = vmlaq_s32(out.val[0], - vextq_s32(data.val[0], data.val[1], 2), fifteen); - - out.val[0] = vmlaq_s32(out.val[0], - vextq_s32(data.val[0], data.val[1], 3), twenty); - - out.val[0] = vmlaq_s32(out.val[0], data.val[1], fifteen); - - out.val[0] = vmlaq_s32(out.val[0], - vextq_s32(data.val[1], data.val[2], 1), six); - - out.val[0] = vaddq_s32(out.val[0], - vextq_s32(data.val[1], data.val[2], 2)); - - out.val[1] = vmlaq_s32(out.val[1], - vextq_s32(data.val[1], data.val[2], 1), six); - - out.val[1] = vmlaq_s32(out.val[1], - vextq_s32(data.val[1], data.val[2], 2), fifteen); - - out.val[1] = vmlaq_s32(out.val[1], - vextq_s32(data.val[1], data.val[2], 3), twenty); - - out.val[1] = vmlaq_s32(out.val[1], data.val[2], fifteen); - - out.val[1] = vmlaq_s32(out.val[1], - vextq_s32(data.val[2], data.val[3], 1), six); - - out.val[1] = vaddq_s32(out.val[1], - vextq_s32(data.val[2], data.val[3], 2)); - - return out; -} -} // namespace - -NESobel7x7HorKernel::NESobel7x7HorKernel() - : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false), _border_size(0) -{ -} - -BorderSize NESobel7x7HorKernel::border_size() const -{ - return _border_size; -} - -void NESobel7x7HorKernel::configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output_x, Format::S32); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output_y, Format::S32); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - _border_size = BorderSize(border_undefined ? 0 : 3, 3); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), -border_size().left, num_elems_read_per_iteration), - output_x_access, - output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NESobel7x7HorKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - Iterator input(_input, window); - Iterator output_x; - Iterator output_y; - - if(_run_sobel_x) - { - output_x = Iterator(_output_x, window); - } - - if(_run_sobel_y) - { - output_y = Iterator(_output_y, window); - } - - if(_run_sobel_y && _run_sobel_x) - { - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr() - 3); - - const uint16x8_t tmp_low_u16 = vmovl_u8(vget_low_u8(data)); - const uint16x8_t tmp_high_u16 = vmovl_u8(vget_high_u8(data)); - - const int32x4x4_t data_s32 = - { - { - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(tmp_low_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(tmp_low_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(tmp_high_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(tmp_high_u16))) - } - }; - - const int32x4x2_t out_y = compute_hor_sobel_y(data_s32); - vst1q_s32(reinterpret_cast(output_y.ptr()), out_y.val[0]); - vst1q_s32(reinterpret_cast(output_y.ptr()) + 4, out_y.val[1]); - - const int32x4x2_t out_x = compute_hor_sobel_x(data_s32); - vst1q_s32(reinterpret_cast(output_x.ptr()), out_x.val[0]); - vst1q_s32(reinterpret_cast(output_x.ptr()) + 4, out_x.val[1]); - }, - input, output_x, output_y); - } - else if(_run_sobel_x) - { - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr() - 3); - - const uint16x8_t tmp_low_u16 = vmovl_u8(vget_low_u8(data)); - const uint16x8_t tmp_high_u16 = vmovl_u8(vget_high_u8(data)); - - const int32x4x4_t data_s32 = - { - { - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(tmp_low_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(tmp_low_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(tmp_high_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(tmp_high_u16))) - } - }; - - const int32x4x2_t out = compute_hor_sobel_x(data_s32); - vst1q_s32(reinterpret_cast(output_x.ptr()), out.val[0]); - vst1q_s32(reinterpret_cast(output_x.ptr()) + 4, out.val[1]); - }, - input, output_x); - } - else if(_run_sobel_y) - { - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr() - 3); - - const uint16x8_t tmp_low_u16 = vmovl_u8(vget_low_u8(data)); - const uint16x8_t tmp_high_u16 = vmovl_u8(vget_high_u8(data)); - - const int32x4x4_t data_s32 = - { - { - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(tmp_low_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(tmp_low_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(tmp_high_u16))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(tmp_high_u16))) - } - }; - - const int32x4x2_t out = compute_hor_sobel_y(data_s32); - vst1q_s32(reinterpret_cast(output_y.ptr()), out.val[0]); - vst1q_s32(reinterpret_cast(output_y.ptr()) + 4, out.val[1]); - }, - input, output_y); - } -} - -NESobel7x7VertKernel::NESobel7x7VertKernel() - : _input_x(nullptr), _input_y(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false) -{ -} - -BorderSize NESobel7x7VertKernel::border_size() const -{ - return BorderSize{ 3, 0 }; -} - -void NESobel7x7VertKernel::configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = (output_x != nullptr); - _run_sobel_y = (output_y != nullptr); - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input_x, Format::S32); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output_x, Format::S32); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input_y, Format::S32); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output_y, Format::S32); - } - - _input_x = input_x; - _input_y = input_y; - _output_x = output_x; - _output_y = output_y; - - const ITensor *const input = _run_sobel_x ? input_x : input_y; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 7; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input_x == nullptr ? nullptr : input_x->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - AccessWindowRectangle(input_y == nullptr ? nullptr : input_y->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_x_access, - output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NESobel7x7VertKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - Iterator input_x; - Iterator input_y; - Iterator output_x; - Iterator output_y; - - int32_t in_x_stride = 0; - int32_t in_y_stride = 0; - - if(_run_sobel_x) - { - input_x = Iterator(_input_x, window); - output_x = Iterator(_output_x, window); - in_x_stride = _input_x->info()->strides_in_bytes()[1] / pixel_size_from_format(_input_x->info()->format()); - } - - if(_run_sobel_y) - { - input_y = Iterator(_input_y, window); - output_y = Iterator(_output_y, window); - in_y_stride = _input_y->info()->strides_in_bytes()[1] / pixel_size_from_format(_input_y->info()->format()); - } - - if(_run_sobel_x) - { - execute_window_loop(window, [&](const Coordinates &) - { - auto in_ptr = reinterpret_cast(input_x.ptr()) - 3 * in_x_stride; - - //top3 - int32x4x2_t data = - { - { - vld1q_s32(in_ptr), - vld1q_s32(in_ptr + 4) - } - }; - - int32x4x2_t out = data; - - //top2 - in_ptr += in_x_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], six); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], six); - - //top - in_ptr += in_x_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], fifteen); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], fifteen); - - //mid - in_ptr += in_x_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], twenty); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], twenty); - - //low - in_ptr += in_x_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], fifteen); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], fifteen); - - //low2 - in_ptr += in_x_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], six); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], six); - - //low3 - in_ptr += in_x_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vaddq_s32(out.val[0], data.val[0]); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vaddq_s32(out.val[1], data.val[1]); - - vst1q_s32(reinterpret_cast(output_x.ptr()) + 0, out.val[0]); - vst1q_s32(reinterpret_cast(output_x.ptr()) + 4, out.val[1]); - }, - input_x, output_x); - } - - if(_run_sobel_y) - { - execute_window_loop(window, [&](const Coordinates &) - { - auto in_ptr = reinterpret_cast(input_y.ptr()) - 3 * in_y_stride; - - //top3 - int32x4x2_t data = - { - { - vld1q_s32(in_ptr), - vld1q_s32(in_ptr + 4) - } - }; - - int32x4x2_t out = - { - { - vnegq_s32(data.val[0]), - vnegq_s32(data.val[1]) - } - }; - - //top2 - in_ptr += in_y_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], minusfour); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], minusfour); - - //top - in_ptr += in_y_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], minusfive); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], minusfive); - - //low - in_ptr += (2 * in_y_stride); - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], five); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], five); - - //low2 - in_ptr += in_y_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vmlaq_s32(out.val[0], data.val[0], four); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vmlaq_s32(out.val[1], data.val[1], four); - - //low3 - in_ptr += in_y_stride; - data.val[0] = vld1q_s32(in_ptr); - out.val[0] = vaddq_s32(out.val[0], data.val[0]); - - data.val[1] = vld1q_s32(in_ptr + 4); - out.val[1] = vaddq_s32(out.val[1], data.val[1]); - - vst1q_s32(reinterpret_cast(output_y.ptr()) + 0, out.val[0]); - vst1q_s32(reinterpret_cast(output_y.ptr()) + 4, out.val[1]); - }, - input_y, output_y); - } -} diff --git a/src/core/NEON/kernels/NESobel7x7Kernel.h b/src/core/NEON/kernels/NESobel7x7Kernel.h deleted file mode 100644 index c5a3899bab..0000000000 --- a/src/core/NEON/kernels/NESobel7x7Kernel.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H -#define ARM_COMPUTE_NESOBEL7x7KERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. - * - */ -class NESobel7x7HorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel7x7HorKernel"; - } - /** Default constructor */ - NESobel7x7HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; - /** Default destructor */ - ~NESobel7x7HorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< X output of horizontal pass */ - ITensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. - * -*/ -class NESobel7x7VertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel7x7VertKernel"; - } - /** Default constructor */ - NESobel7x7VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; - /** Default destructor */ - ~NESobel7x7VertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set - * @note If output_x is set then input_x must be set too - * @note If output_y is set then input_y must be set too - * - * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input_x; /**< X input (X output of the hor pass) */ - const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ - ITensor *_output_x; /**< X output of sobel */ - ITensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */ diff --git a/src/core/NEON/kernels/NETableLookupKernel.cpp b/src/core/NEON/kernels/NETableLookupKernel.cpp deleted file mode 100644 index 19ce7f0352..0000000000 --- a/src/core/NEON/kernels/NETableLookupKernel.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NETableLookupKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ILut.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" - -#include -#include - -using namespace arm_compute; - -namespace arm_compute -{ -class Coordinates; - -constexpr unsigned int num_num_elems_processed_per_iteration = 16; -} // namespace arm_compute - -NETableLookupKernel::NETableLookupKernel() - : _func(nullptr), _lut(nullptr) -{ -} - -template -void NETableLookupKernel::tableLookup(const Window &window) -{ - uint32_t offset = _lut->index_offset(); - size_t count = _lut->num_elements(); - const auto lut = reinterpret_cast(_lut->buffer()); - unsigned int step = num_num_elems_processed_per_iteration; - - ARM_COMPUTE_ERROR_ON(lut == nullptr); - - Iterator input = Iterator(_input, window); - Iterator output = Iterator(_output, window); - - execute_window_loop(window, [&](const Coordinates &) - { - auto input_ptr = reinterpret_cast(input.ptr()); - auto output_ptr = reinterpret_cast(output.ptr()); - - for(unsigned int i = 0; i < step; ++i, ++input_ptr, ++output_ptr) - { - const int32_t index = offset + *input_ptr; - - if(0 <= index && index < static_cast(count)) - { - *output_ptr = lut[index]; - } - } - }, - input, output); -} - -namespace arm_compute -{ -template <> -void NETableLookupKernel::tableLookup(const Window &window) -{ - const uint8_t *const lut = _lut->buffer(); - unsigned int step = num_num_elems_processed_per_iteration; - - ARM_COMPUTE_ERROR_ON(lut == nullptr); - - Iterator input = Iterator(_input, window); - Iterator output = Iterator(_output, window); - - execute_window_loop(window, [&](const Coordinates &) - { - const uint8_t *input_ptr = input.ptr(); - uint8_t *output_ptr = output.ptr(); - - for(unsigned int i = 0; i < step; ++i) - { - *output_ptr++ = lut[*input_ptr++]; - } - }, - input, output); -} -} // namespace arm_compute - -void NETableLookupKernel::configure(const ITensor *input, const ILut *lut, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr); - ARM_COMPUTE_ERROR_ON(lut == nullptr); - ARM_COMPUTE_ERROR_ON(output == nullptr); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _lut = lut; - - if(input->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::U8) - { - _func = &NETableLookupKernel::tableLookup; - } - else if(input->info()->data_type() == DataType::S16 && output->info()->data_type() == DataType::S16) - { - _func = &NETableLookupKernel::tableLookup; - } - else - { - ARM_COMPUTE_ERROR("Unsupported combination of input and output DataType."); - } - - INESimpleKernel::configure(input, output, num_num_elems_processed_per_iteration); -} - -void NETableLookupKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - (this->*_func)(window); -} diff --git a/src/core/NEON/kernels/NETableLookupKernel.h b/src/core/NEON/kernels/NETableLookupKernel.h deleted file mode 100644 index 7937999b46..0000000000 --- a/src/core/NEON/kernels/NETableLookupKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H -#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H - -#include "src/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; -class ILut; - -/** Interface for the kernel to perform table lookup calculations. */ -class NETableLookupKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NETableLookupKernel"; - } - /** Default constructor */ - NETableLookupKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETableLookupKernel(const NETableLookupKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; - /** Allow instances of this class to be moved */ - NETableLookupKernel(NETableLookupKernel &&) = default; - /** Allow instances of this class to be moved */ - NETableLookupKernel &operator=(NETableLookupKernel &&) = default; - /** Default destructor */ - ~NETableLookupKernel() = default; - /** Initialise the kernel's input, lut and output. - * - * @param[in] input An input tensor. Data types supported: U8/S16. - * @param[in] lut The input LUT. - * @param[out] output The output tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ILut *lut, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Perform table lookup on a given window. - * - * @param window window Region on which to execute the kernel. - */ - template - void tableLookup(const Window &window); - /** Common signature for all the specialised lut functions - * - * @param[in] window Region on which to execute the kernel. - */ - using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); - /** Sub function to use for the particular tensor types passed to configure() */ - TableLookupFunction _func; - const ILut *_lut; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */ diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp deleted file mode 100644 index 108f29f377..0000000000 --- a/src/core/NEON/kernels/NEThresholdKernel.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEThresholdKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "src/core/NEON/wrapper/wrapper.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - // Checks performed when output is configured - if((output != nullptr) && (output->total_size() != 0)) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - } - - return Status{}; -} - -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // Configure kernel window - Window win = calculate_max_window(*input, Steps()); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, *input->clone()); - - // NEThresholdKernel doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->num_dimensions()); - output->set_valid_region(ValidRegion(coord, output->tensor_shape())); - - return std::make_pair(Status{}, win); -} -} // namespace - -NEThresholdKernel::NEThresholdKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _info() -{ -} - -void NEThresholdKernel::configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), info)); - - _input = input; - _output = output; - _info = info; - - switch(_info.type) - { - case ThresholdType::BINARY: - _func = &NEThresholdKernel::run_binary; - break; - case ThresholdType::RANGE: - _func = &NEThresholdKernel::run_range; - break; - default: - ARM_COMPUTE_ERROR("Thresholding type not recognized"); - break; - } - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICPPKernel::configure(win_config.second); -} - -Status NEThresholdKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); - - return Status{}; -} - -inline void NEThresholdKernel::run_binary(const Window &window) -{ - /** Neon vector tag type. */ - using Type = uint8_t; - using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t; - - const int window_step_x = 16 / sizeof(Type); - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - const uint8_t threshold = _info.threshold; - const uint8_t true_value = _info.true_value; - const uint8_t false_value = _info.false_value; - - const auto vthreshold = wrapper::vdup_n(threshold, ExactTagType{}); - const auto vtrue_value = wrapper::vdup_n(true_value, ExactTagType{}); - const auto vfalse_value = wrapper::vdup_n(false_value, ExactTagType{}); - - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); - - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vdata = wrapper::vloadq(input_ptr + x); - const auto vmask = wrapper::vcgt(vdata, vthreshold); - wrapper::vstore(output_ptr + x, wrapper::vbsl(vmask, vtrue_value, vfalse_value)); - } - - for(; x < window_end_x; ++x) - { - const Type data = *(reinterpret_cast(input_ptr + x)); - *(output_ptr + x) = (data > threshold) ? true_value : false_value; - } - }, - input, output); -} - -inline void NEThresholdKernel::run_range(const Window &window) -{ - /** Neon vector tag type. */ - using Type = uint8_t; - using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t; - - const int window_step_x = 16 / sizeof(Type); - const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()); - - Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); - win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); - - const uint8_t lower_threshold = _info.threshold; - const uint8_t upper_threshold = _info.upper; - const uint8_t true_value = _info.true_value; - const uint8_t false_value = _info.false_value; - - const auto vlower_threshold = wrapper::vdup_n(lower_threshold, ExactTagType{}); - const auto vupper_threshold = wrapper::vdup_n(upper_threshold, ExactTagType{}); - const auto vtrue_value = wrapper::vdup_n(true_value, ExactTagType{}); - const auto vfalse_value = wrapper::vdup_n(false_value, ExactTagType{}); - - Iterator input(_input, win_collapsed); - Iterator output(_output, win_collapsed); - - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto vdata = wrapper::vloadq(input_ptr + x); - auto vmask = wrapper::vcle(vdata, vupper_threshold); - vmask = wrapper::vand(wrapper::vcge(vdata, vlower_threshold), vmask); - wrapper::vstore(output_ptr + x, wrapper::vbsl(vmask, vtrue_value, vfalse_value)); - } - - for(; x < window_end_x; ++x) - { - const Type data = *(reinterpret_cast(input_ptr + x)); - *(output_ptr + x) = (data <= upper_threshold && data >= lower_threshold) ? true_value : false_value; - } - }, - input, output); -} - -void NEThresholdKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEThresholdKernel.h b/src/core/NEON/kernels/NEThresholdKernel.h deleted file mode 100644 index 6b3b3866b0..0000000000 --- a/src/core/NEON/kernels/NEThresholdKernel.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H -#define ARM_COMPUTE_NETHRESHOLDKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Types.h" -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the thresholding kernel */ -class NEThresholdKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEThresholdKernel"; - } - /** Default constructor */ - NEThresholdKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEThresholdKernel(const NEThresholdKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; - /** Allow instances of this class to be moved */ - NEThresholdKernel(NEThresholdKernel &&) = default; - /** Allow instances of this class to be moved */ - NEThresholdKernel &operator=(NEThresholdKernel &&) = default; - /** Default destructor */ - ~NEThresholdKernel() = default; - /** Initialise the kernel's input, output and threshold parameters. - * - * @param[in] input An input tensor. Data type supported: U8 - * @param[out] output The output tensor. Data type supported: U8. - * @param[in] info Threshold kernel descriptor - */ - void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel - * - * @param[in] input Input tensor info. Data type supported: U8 - * @param[in] output Output tensor info. Data type supported: U8 - * @param[in] info Threshold kernel descriptor - * - * @return A status containing an error code in case of failure - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** run binary thresholding on the given window */ - void run_binary(const Window &window); - /** run range thresholding on the given window */ - void run_range(const Window &window); - - void (NEThresholdKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input */ - ITensor *_output; /**< Output */ - ThresholdKernelInfo _info; /**< Threshold descriptor */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/src/core/NEON/kernels/NEWarpKernel.cpp b/src/core/NEON/kernels/NEWarpKernel.cpp deleted file mode 100644 index 1ae076153b..0000000000 --- a/src/core/NEON/kernels/NEWarpKernel.cpp +++ /dev/null @@ -1,807 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEWarpKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/ScaleHelpers.h" -#include "src/core/helpers/WindowHelpers.h" - -#include - -using namespace arm_compute; - -namespace -{ -inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride) -{ - return in_ptr[x + y * stride]; -} -} // namespace - -INEWarpKernel::INEWarpKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix() -{ -} - -BorderSize INEWarpKernel::border_size() const -{ - return BorderSize(1); -} - -void INEWarpKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} - -void INEWarpKernel::configure(const ITensor *input, ITensor *output, const std::array &matrix, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - _matrix = matrix; - _constant_border_value = constant_border_value; - - switch(border_mode) - { - case BorderMode::UNDEFINED: - _func = &INEWarpKernel::warp_undefined; - break; - case BorderMode::CONSTANT: - _func = &INEWarpKernel::warp_constant; - break; - case BorderMode::REPLICATE: - _func = &INEWarpKernel::warp_replicate; - break; - default: - ARM_COMPUTE_ERROR("Border mode not supported"); - break; - } - - _input = input; - _output = output; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(1U)); - - const ValidRegion &input_valid_region = input->info()->valid_region(); - - // Reads can occur within the valid region of the input - AccessWindowStatic input_access(input->info(), - input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top, - input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right, - input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom); - AccessWindowHorizontal output_access(output->info(), 0, 1); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - INEKernel::configure(win); -} - -template -void NEWarpAffineKernel::warp_undefined(const Window &window) -{ - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - - const int min_x = _input->info()->valid_region().anchor[0]; - const int max_x = min_x + _input->info()->valid_region().shape[0]; - const int min_y = _input->info()->valid_region().anchor[1]; - const int max_y = min_y + _input->info()->valid_region().shape[1]; - const size_t stride = _input->info()->strides_in_bytes()[1]; - - // x0 = M01 * x + M01 * y + M02 - // y0 = M11 * x + M11 * y + M12 - const float M00 = _matrix[0]; - const float M10 = _matrix[1]; - const float M01 = _matrix[0 + 1 * 2]; - const float M11 = _matrix[1 + 1 * 2]; - const float M02 = _matrix[0 + 2 * 2]; - const float M12 = _matrix[1 + 2 * 2]; - - // "M00 * x" and "M10 * x", when x = window.x.start - const float start_x0 = M00 * window.x().start(); - const float start_y0 = M10 * window.x().start(); - - // Current row - int y_cur = window.y().start(); - int z_cur = window.z().start(); - int d3_cur = window[3].start(); - int d4_cur = window[4].start(); - int d5_cur = window[5].start(); - - // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing - float const_x0 = M01 * y_cur + M02; - float const_y0 = M11 * y_cur + M12; - - // Affine warp coordinates - float x0 = start_x0 + const_x0; - float y0 = start_y0 + const_y0; - - execute_window_loop(window, [&](const Coordinates & id) - { - // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 - if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) - { - y_cur = id.y(); - z_cur = id.z(); - d3_cur = id[3]; - d4_cur = id[4]; - d5_cur = id[5]; - - const_x0 = M01 * y_cur + M02; - const_y0 = M11 * y_cur + M12; - - x0 = start_x0 + const_x0; - y0 = start_y0 + const_y0; - } - - // Only write to output if x0 and y0 are within the valid region. - // Otherwise the read value would be undefined. - if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x)) - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); - break; - case InterpolationPolicy::BILINEAR: - *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - - x0 += M00; - y0 += M10; - }, - in, out); -} - -template -void NEWarpAffineKernel::warp_constant(const Window &window) -{ - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - - const int min_x = _input->info()->valid_region().anchor[0]; - const int max_x = min_x + _input->info()->valid_region().shape[0]; - const int min_y = _input->info()->valid_region().anchor[1]; - const int max_y = min_y + _input->info()->valid_region().shape[1]; - const size_t stride = _input->info()->strides_in_bytes()[1]; - - // x0 = M01 * x + M01 * y + M02 - // y0 = M11 * x + M11 * y + M12 - const float M00 = _matrix[0]; - const float M10 = _matrix[1]; - const float M01 = _matrix[0 + 1 * 2]; - const float M11 = _matrix[1 + 1 * 2]; - const float M02 = _matrix[0 + 2 * 2]; - const float M12 = _matrix[1 + 2 * 2]; - - // "M00 * x" and "M10 * x", when x = window.x.start - const float start_x0 = M00 * window.x().start(); - const float start_y0 = M10 * window.x().start(); - - // Current row - int y_cur = window.y().start(); - int z_cur = window.z().start(); - int d3_cur = window[3].start(); - int d4_cur = window[4].start(); - int d5_cur = window[5].start(); - - // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing - float const_x0 = M01 * y_cur + M02; - float const_y0 = M11 * y_cur + M12; - - // Affine warp coordinates - float x0 = start_x0 + const_x0; - float y0 = start_y0 + const_y0; - - execute_window_loop(window, [&](const Coordinates & id) - { - // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 - if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) - { - y_cur = id.y(); - z_cur = id.z(); - d3_cur = id[3]; - d4_cur = id[4]; - d5_cur = id[5]; - - const_x0 = M01 * y_cur + M02; - const_y0 = M11 * y_cur + M12; - - x0 = start_x0 + const_x0; - y0 = start_y0 + const_y0; - } - - // Only use input values if x0 and y0 are within the valid region. - // Otherwise write the constant border value. - if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x)) - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); - break; - case InterpolationPolicy::BILINEAR: - *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - else - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = _constant_border_value; - break; - case InterpolationPolicy::BILINEAR: - { - const auto xi = utility::clamp(std::floor(x0), min_x - 1, max_x); - const auto yi = utility::clamp(std::floor(y0), min_y - 1, max_y); - const auto xi_1 = utility::clamp(std::floor(x0 + 1), min_x - 1, max_x); - const auto yi_1 = utility::clamp(std::floor(y0 + 1), min_y - 1, max_y); - - const float dx = x0 - std::floor(x0); - const float dy = y0 - std::floor(y0); - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = *(in.ptr() + xi + yi * stride); - const float a01 = *(in.ptr() + xi_1 + yi * stride); - const float a10 = *(in.ptr() + xi + yi_1 * stride); - const float a11 = *(in.ptr() + xi_1 + yi_1 * stride); - - *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy); - } - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - - x0 += M00; - y0 += M10; - }, - in, out); -} - -template -void NEWarpAffineKernel::warp_replicate(const Window &window) -{ - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - - const int min_x = _input->info()->valid_region().anchor[0]; - const int max_x = min_x + _input->info()->valid_region().shape[0]; - const int min_y = _input->info()->valid_region().anchor[1]; - const int max_y = min_y + _input->info()->valid_region().shape[1]; - const size_t stride = _input->info()->strides_in_bytes()[1]; - - // Current row - int y_cur = window.y().start(); - int z_cur = window.z().start(); - int d3_cur = window[3].start(); - int d4_cur = window[4].start(); - int d5_cur = window[5].start(); - - const float M00 = _matrix[0]; - const float M10 = _matrix[1]; - const float M01 = _matrix[0 + 1 * 2]; - const float M11 = _matrix[1 + 1 * 2]; - const float M02 = _matrix[0 + 2 * 2]; - const float M12 = _matrix[1 + 2 * 2]; - - // "M00 * x" and "M10 * x", when x = window.x.start - const float start_x0 = M00 * window.x().start(); - const float start_y0 = M10 * window.x().start(); - - // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing - float const_x0 = M01 * y_cur + M02; - float const_y0 = M11 * y_cur + M12; - - float x0 = start_x0 + const_x0; - float y0 = start_y0 + const_y0; - - execute_window_loop(window, [&](const Coordinates & id) - { - // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 - if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) - { - y_cur = id.y(); - z_cur = id.z(); - d3_cur = id[3]; - d4_cur = id[4]; - d5_cur = id[5]; - - const_x0 = M01 * y_cur + M02; - const_y0 = M11 * y_cur + M12; - - x0 = start_x0 + const_x0; - y0 = start_y0 + const_y0; - } - - // Only load from (x0, y0) if the point is within the valid region. - // Otherwise load from the edge of the valid region. - if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x)) - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride); - break; - case InterpolationPolicy::BILINEAR: - *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0); - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - else - { - // Clamp coordinates - const auto xi = utility::clamp(std::floor(x0), min_x, max_x - 1); - const auto yi = utility::clamp(std::floor(y0), min_y, max_y - 1); - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = *(in.ptr() + xi + yi * stride); - break; - case InterpolationPolicy::BILINEAR: - { - const auto xi_1 = utility::clamp(std::floor(x0 + 1), min_x, max_x - 1); - const auto yi_1 = utility::clamp(std::floor(y0 + 1), min_y, max_y - 1); - - const float dx = x0 - std::floor(x0); - const float dy = y0 - std::floor(y0); - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = *(in.ptr() + xi + yi * stride); - const float a01 = *(in.ptr() + xi_1 + yi * stride); - const float a10 = *(in.ptr() + xi + yi_1 * stride); - const float a11 = *(in.ptr() + xi_1 + yi_1 * stride); - - *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy); - } - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - - x0 += M00; - y0 += M10; - }, - in, out); -} - -template -void NEWarpPerspectiveKernel::warp_undefined(const Window &window) -{ - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - - const int min_x = _input->info()->valid_region().anchor[0]; - const int max_x = min_x + _input->info()->valid_region().shape[0]; - const int min_y = _input->info()->valid_region().anchor[1]; - const int max_y = min_y + _input->info()->valid_region().shape[1]; - const size_t stride = _input->info()->strides_in_bytes()[1]; - - // x0 = M00 * x + M01 * y + M02 - // y0 = M10 * x + M11 * y + M12 - // z0 = M20 * x + M21 * y + M22 - // xn = x0 / z0 - // yn = y0 / z0 - const float M00 = _matrix[0]; - const float M10 = _matrix[1]; - const float M20 = _matrix[2]; - const float M01 = _matrix[0 + 1 * 3]; - const float M11 = _matrix[1 + 1 * 3]; - const float M21 = _matrix[2 + 1 * 3]; - const float M02 = _matrix[0 + 2 * 3]; - const float M12 = _matrix[1 + 2 * 3]; - const float M22 = _matrix[2 + 2 * 3]; - - // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start - const float start_x0 = M00 * window.x().start(); - const float start_y0 = M10 * window.x().start(); - const float start_z0 = M20 * window.x().start(); - - // Current row - int y_cur = window.y().start(); - int z_cur = window.z().start(); - int d3_cur = window[3].start(); - int d4_cur = window[4].start(); - int d5_cur = window[5].start(); - - // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing - float const_x0 = M01 * y_cur + M02; - float const_y0 = M11 * y_cur + M12; - float const_z0 = M21 * y_cur + M22; - - // Perspective warp coordinates - float x0 = start_x0 + const_x0; - float y0 = start_y0 + const_y0; - float z0 = start_z0 + const_z0; - - execute_window_loop(window, [&](const Coordinates & id) - { - // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 - if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) - { - y_cur = id.y(); - z_cur = id.z(); - d3_cur = id[3]; - d4_cur = id[4]; - d5_cur = id[5]; - - const_x0 = M01 * y_cur + M02; - const_y0 = M11 * y_cur + M12; - const_z0 = M21 * y_cur + M22; - - x0 = start_x0 + const_x0; - y0 = start_y0 + const_y0; - z0 = start_z0 + const_z0; - } - - const float xn = x0 / z0; - const float yn = y0 / z0; - - // Only write to output if xn and yn are within the valid region. - // Otherwise the read value would be undefined. - if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x)) - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); - break; - case InterpolationPolicy::BILINEAR: - *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - - x0 += M00; - y0 += M10; - z0 += M20; - }, - in, out); -} - -template -void NEWarpPerspectiveKernel::warp_constant(const Window &window) -{ - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - - const int min_x = _input->info()->valid_region().anchor[0]; - const int max_x = min_x + _input->info()->valid_region().shape[0]; - const int min_y = _input->info()->valid_region().anchor[1]; - const int max_y = min_y + _input->info()->valid_region().shape[1]; - const size_t stride = _input->info()->strides_in_bytes()[1]; - - // x0 = M00 * x + M01 * y + M02 - // y0 = M10 * x + M11 * y + M12 - // z0 = M20 * x + M21 * y + M22 - // xn = x0 / z0 - // yn = y0 / z0 - const float M00 = _matrix[0]; - const float M10 = _matrix[1]; - const float M20 = _matrix[2]; - const float M01 = _matrix[0 + 1 * 3]; - const float M11 = _matrix[1 + 1 * 3]; - const float M21 = _matrix[2 + 1 * 3]; - const float M02 = _matrix[0 + 2 * 3]; - const float M12 = _matrix[1 + 2 * 3]; - const float M22 = _matrix[2 + 2 * 3]; - - // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start - const float start_x0 = M00 * window.x().start(); - const float start_y0 = M10 * window.x().start(); - const float start_z0 = M20 * window.x().start(); - - // Current row - int y_cur = window.y().start(); - int z_cur = window.z().start(); - int d3_cur = window[3].start(); - int d4_cur = window[4].start(); - int d5_cur = window[5].start(); - - // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing - float const_x0 = M01 * y_cur + M02; - float const_y0 = M11 * y_cur + M12; - float const_z0 = M21 * y_cur + M22; - - // Perspective warp coordinates - float x0 = start_x0 + const_x0; - float y0 = start_y0 + const_y0; - float z0 = start_z0 + const_z0; - - execute_window_loop(window, [&](const Coordinates & id) - { - // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 - if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) - { - y_cur = id.y(); - z_cur = id.z(); - d3_cur = id[3]; - d4_cur = id[4]; - d5_cur = id[5]; - - const_x0 = M01 * y_cur + M02; - const_y0 = M11 * y_cur + M12; - const_z0 = M21 * y_cur + M22; - - x0 = start_x0 + const_x0; - y0 = start_y0 + const_y0; - z0 = start_z0 + const_z0; - } - - const float xn = x0 / z0; - const float yn = y0 / z0; - - // Only use input values if xn and yn are within the valid region. - if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x)) - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); - break; - case InterpolationPolicy::BILINEAR: - *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - else - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = _constant_border_value; - break; - case InterpolationPolicy::BILINEAR: - { - const auto xi = utility::clamp(std::floor(xn), min_x - 1, max_x); - const auto yi = utility::clamp(std::floor(yn), min_y - 1, max_y); - const auto xi_1 = utility::clamp(std::floor(xn + 1), min_x - 1, max_x); - const auto yi_1 = utility::clamp(std::floor(yn + 1), min_y - 1, max_y); - - const float dx = xn - std::floor(xn); - const float dy = yn - std::floor(yn); - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = *(in.ptr() + xi + yi * stride); - const float a01 = *(in.ptr() + xi_1 + yi * stride); - const float a10 = *(in.ptr() + xi + yi_1 * stride); - const float a11 = *(in.ptr() + xi_1 + yi_1 * stride); - - *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy); - } - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - - x0 += M00; - y0 += M10; - z0 += M20; - }, - in, out); -} - -template -void NEWarpPerspectiveKernel::warp_replicate(const Window &window) -{ - // Don't increment in X and Y direction for the input tensor - // A pointer to the start of this plane is needed as base for the precomputed offsets - Window win_in(window); - win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - - Iterator in(_input, win_in); - Iterator out(_output, window); - - const int min_x = _input->info()->valid_region().anchor[0]; - const int max_x = min_x + _input->info()->valid_region().shape[0]; - const int min_y = _input->info()->valid_region().anchor[1]; - const int max_y = min_y + _input->info()->valid_region().shape[1]; - const size_t stride = _input->info()->strides_in_bytes()[1]; - - // Current row - int y_cur = window.y().start(); - int z_cur = window.z().start(); - int d3_cur = window[3].start(); - int d4_cur = window[4].start(); - int d5_cur = window[5].start(); - - // x0 = M00 * x + M01 * y + M02 - // y0 = M10 * x + M11 * y + M12 - // z0 = M20 * x + M21 * y + M22 - // xn = x0 / z0 - // yn = y0 / z0 - const float M00 = _matrix[0]; - const float M10 = _matrix[1]; - const float M20 = _matrix[2]; - const float M01 = _matrix[0 + 1 * 3]; - const float M11 = _matrix[1 + 1 * 3]; - const float M21 = _matrix[2 + 1 * 3]; - const float M02 = _matrix[0 + 2 * 3]; - const float M12 = _matrix[1 + 2 * 3]; - const float M22 = _matrix[2 + 2 * 3]; - - // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start - const float start_x0 = M00 * window.x().start(); - const float start_y0 = M10 * window.x().start(); - const float start_z0 = M20 * window.x().start(); - - // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing - float const_x0 = M01 * y_cur + M02; - float const_y0 = M11 * y_cur + M12; - float const_z0 = M21 * y_cur + M22; - - // Perspective warp coordinates - float x0 = start_x0 + const_x0; - float y0 = start_y0 + const_y0; - float z0 = start_z0 + const_z0; - - execute_window_loop(window, [&](const Coordinates & id) - { - // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0 - if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5])) - { - y_cur = id.y(); - z_cur = id.z(); - d3_cur = id[3]; - d4_cur = id[4]; - d5_cur = id[5]; - - const_x0 = M01 * y_cur + M02; - const_y0 = M11 * y_cur + M12; - const_z0 = M21 * y_cur + M22; - - x0 = start_x0 + const_x0; - y0 = start_y0 + const_y0; - z0 = start_z0 + const_z0; - } - - const float xn = x0 / z0; - const float yn = y0 / z0; - - // Only load from (x0, y0) if the point is within the valid region. - if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x)) - { - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride); - break; - case InterpolationPolicy::BILINEAR: - *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn); - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - else - { - // Clamp coordinates - const auto xi = utility::clamp(std::floor(xn), min_x, max_x - 1); - const auto yi = utility::clamp(std::floor(yn), min_y, max_y - 1); - switch(interpolation) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - *out.ptr() = *(in.ptr() + xi + yi * stride); - break; - case InterpolationPolicy::BILINEAR: - { - const auto xi_1 = utility::clamp(std::floor(xn + 1), min_x, max_x - 1); - const auto yi_1 = utility::clamp(std::floor(yn + 1), min_y, max_y - 1); - - const float dx = xn - std::floor(xn); - const float dy = yn - std::floor(yn); - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = *(in.ptr() + xi + yi * stride); - const float a01 = *(in.ptr() + xi_1 + yi * stride); - const float a10 = *(in.ptr() + xi + yi_1 * stride); - const float a11 = *(in.ptr() + xi_1 + yi_1 * stride); - - *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy); - } - break; - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - - x0 += M00; - y0 += M10; - z0 += M20; - }, - in, out); -} - -template class arm_compute::NEWarpAffineKernel; -template class arm_compute::NEWarpAffineKernel; -template class arm_compute::NEWarpPerspectiveKernel; -template class arm_compute::NEWarpPerspectiveKernel; diff --git a/src/core/NEON/kernels/NEWarpKernel.h b/src/core/NEON/kernels/NEWarpKernel.h deleted file mode 100644 index 2c4cb55e3c..0000000000 --- a/src/core/NEON/kernels/NEWarpKernel.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPKERNEL_H -#define ARM_COMPUTE_NEWARPKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/INEKernel.h" - -#include -#include -namespace arm_compute -{ -class ITensor; - -/** Common interface for warp affine and warp perspective */ -class INEWarpKernel : public INEKernel -{ -public: - /** Default constructor */ - INEWarpKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWarpKernel(const INEWarpKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWarpKernel &operator=(const INEWarpKernel &) = delete; - /** Allow instances of this class to be moved */ - INEWarpKernel(INEWarpKernel &&) = default; - /** Allow instances of this class to be moved */ - INEWarpKernel &operator=(INEWarpKernel &&) = default; - /** Default destructor */ - ~INEWarpKernel() = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. - * The matrix argument requires 9 values, for the affine case the last 3 values are ignored. - * @param[in] border_mode Strategy to use for borders - * @param[in] constant_border_value Constant value used for filling the border. - */ - virtual void configure(const ITensor *input, ITensor *output, const std::array &matrix, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - // Inherited methods overridden: - BorderSize border_size() const override; - -protected: - /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_undefined(const Window &window) = 0; - /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_constant(const Window &window) = 0; - /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_replicate(const Window &window) = 0; - /** Common signature for all the specialised warp functions - * - * @param[in] window Region on which to execute the kernel. - */ - void (INEWarpKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input Tensor */ - ITensor *_output; /**< Output Tensor */ - uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ - std::array _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ -}; - -/** Template interface for the kernel to compute warp affine - * - */ -template -class NEWarpAffineKernel : public INEWarpKernel -{ -private: - const char *name() const override - { - return "NEWarpAffineKernel"; - } - // Inherited methods overridden: - void warp_undefined(const Window &window) override; - void warp_constant(const Window &window) override; - void warp_replicate(const Window &window) override; -}; - -/** Template interface for the kernel to compute warp perspective - * - */ -template -class NEWarpPerspectiveKernel : public INEWarpKernel -{ -private: - const char *name() const override - { - return "NEWarpPerspectiveKernel"; - } - // Inherited methods overridden: - void warp_undefined(const Window &window) override; - void warp_constant(const Window &window) override; - void warp_replicate(const Window &window) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEWARPKERNEL_H */ diff --git a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp deleted file mode 100644 index 1c37af980e..0000000000 --- a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" - -#include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" - -#include - -namespace arm_compute -{ -NEAbsoluteDifference::~NEAbsoluteDifference() = default; - -void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input2, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(input1, input2, output); - _kernel = std::move(k); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEAccumulate.cpp b/src/runtime/NEON/functions/NEAccumulate.cpp deleted file mode 100644 index b81ec24a39..0000000000 --- a/src/runtime/NEON/functions/NEAccumulate.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" - -#include "src/core/NEON/kernels/NEAccumulateKernel.h" - -#include - -namespace arm_compute -{ -NEAccumulate::~NEAccumulate() = default; - -void NEAccumulate::configure(const ITensor *input, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} - -NEAccumulateWeighted::~NEAccumulateWeighted() = default; - -void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16) -{ - if(use_fp16) - { - auto k = std::make_unique(); - k->configure(input, alpha, output); - _kernel = std::move(k); - } - else - { - auto k = std::make_unique(); - k->configure(input, alpha, output); - _kernel = std::move(k); - } -} - -NEAccumulateSquared::~NEAccumulateSquared() = default; - -void NEAccumulateSquared::configure(const ITensor *input, uint32_t shift, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(input, shift, output); - _kernel = std::move(k); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEBox3x3.cpp b/src/runtime/NEON/functions/NEBox3x3.cpp deleted file mode 100644 index ee40e2c475..0000000000 --- a/src/runtime/NEON/functions/NEBox3x3.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEBox3x3Kernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" - -#include - -namespace arm_compute -{ -void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16) -{ - if(use_fp16) - { - auto k = std::make_unique(); - k->configure(input, output, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - } - else - { - auto k = std::make_unique(); - k->configure(input, output, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - } - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp deleted file mode 100644 index 52bc81e001..0000000000 --- a/src/runtime/NEON/functions/NECannyEdge.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" -#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" -#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NECannyEdgeKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NESobel5x5Kernel.h" -#include "src/core/NEON/kernels/NESobel7x7Kernel.h" - -#include -#include -#include - -namespace arm_compute -{ -NECannyEdge::~NECannyEdge() = default; - -NECannyEdge::NECannyEdge(std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _sobel(), - _gradient(), - _non_max_suppr(), - _edge_trace(), - _border_mag_gradient(), - _border_edge_trace(), - _gx(), - _gy(), - _magnitude(), - _phase(), - _nonmax(), - _output(nullptr) -{ -} - -void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type)); - ARM_COMPUTE_ERROR_ON((gradient_size != 3) && (gradient_size != 5) && (gradient_size != 7)); - ARM_COMPUTE_ERROR_ON((lower_thr < 0) || (lower_thr >= upper_thr)); - - _output = output; - - const TensorShape &shape = input->info()->tensor_shape(); - TensorInfo gradient_info; - TensorInfo magnitude_info; - - // Initialize images - if(gradient_size < 7) - { - gradient_info.init(shape, Format::S16); - magnitude_info.init(shape, Format::U16); - } - else - { - gradient_info.init(shape, Format::S32); - magnitude_info.init(shape, Format::U32); - } - - _gx.allocator()->init(gradient_info); - _gy.allocator()->init(gradient_info); - _magnitude.allocator()->init(magnitude_info); - - TensorInfo info(shape, Format::U8); - _phase.allocator()->init(info); - _nonmax.allocator()->init(info); - - // Manage intermediate buffers - _memory_group.manage(&_gx); - _memory_group.manage(&_gy); - - // Configure/Init sobelNxN - if(gradient_size == 3) - { - auto k = std::make_unique(); - k->configure(input, &_gx, &_gy, border_mode, constant_border_value); - _sobel = std::move(k); - } - else if(gradient_size == 5) - { - auto k = std::make_unique(); - k->configure(input, &_gx, &_gy, border_mode, constant_border_value); - _sobel = std::move(k); - } - else if(gradient_size == 7) - { - auto k = std::make_unique(); - k->configure(input, &_gx, &_gy, border_mode, constant_border_value); - _sobel = std::move(k); - } - else - { - ARM_COMPUTE_ERROR_VAR("Gradient size %+" PRId32 " not supported\n", gradient_size); - } - - // Manage intermediate buffers - _memory_group.manage(&_magnitude); - _memory_group.manage(&_phase); - - // Configure gradient - auto k = std::make_unique(); - k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type); - _gradient = std::move(k); - - // Allocate intermediate tensors - _gx.allocator()->allocate(); - _gy.allocator()->allocate(); - - // Manage intermediate buffers - _memory_group.manage(&_nonmax); - - // Configure non-maxima suppression - _non_max_suppr = std::make_unique(); - _non_max_suppr->configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED); - - // Fill border around magnitude image as non-maxima suppression will access - // it. If border mode is undefined filling the border is a nop. - _border_mag_gradient = std::make_unique(); - _border_mag_gradient->configure(&_magnitude, _non_max_suppr->border_size(), border_mode, constant_border_value); - - // Allocate intermediate tensors - _phase.allocator()->allocate(); - _magnitude.allocator()->allocate(); - - // Configure edge tracing - _edge_trace = std::make_unique(); - _edge_trace->configure(&_nonmax, output); - - // Fill border with "No edge" to stop recursion in edge trace - _border_edge_trace = std::make_unique(); - _border_edge_trace->configure(&_nonmax, _edge_trace->border_size(), BorderMode::CONSTANT, static_cast(0.f)); - - // Allocate intermediate tensors - _nonmax.allocator()->allocate(); -} - -void NECannyEdge::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function"); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Run sobelNxN - _sobel->run(); - - // Run gradient - NEScheduler::get().schedule(_gradient.get(), Window::DimY); - - // Fill border before non-maxima suppression. Nop for border mode undefined. - NEScheduler::get().schedule(_border_mag_gradient.get(), Window::DimZ); - - // Run non-maxima suppression - NEScheduler::get().schedule(_non_max_suppr.get(), Window::DimY); - - ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); - std::fill_n(_output->buffer(), _output->info()->total_size(), 0); - - // Fill border before edge trace - NEScheduler::get().schedule(_border_edge_trace.get(), Window::DimZ); - - // Run edge tracing - NEScheduler::get().schedule(_edge_trace.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEChannelCombine.cpp b/src/runtime/NEON/functions/NEChannelCombine.cpp deleted file mode 100644 index b566153bf4..0000000000 --- a/src/runtime/NEON/functions/NEChannelCombine.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" - -#include "src/core/NEON/kernels/NEChannelCombineKernel.h" - -#include - -using namespace arm_compute; - -void NEChannelCombine::configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(plane0, plane1, plane2, plane3, output); - _kernel = std::move(k); -} - -void NEChannelCombine::configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output) -{ - auto k = std::make_unique(); - k->configure(plane0, plane1, plane2, output); - _kernel = std::move(k); -} diff --git a/src/runtime/NEON/functions/NEChannelExtract.cpp b/src/runtime/NEON/functions/NEChannelExtract.cpp deleted file mode 100644 index a43dc28896..0000000000 --- a/src/runtime/NEON/functions/NEChannelExtract.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" - -#include "src/core/NEON/kernels/NEChannelExtractKernel.h" - -#include - -using namespace arm_compute; - -void NEChannelExtract::configure(const ITensor *input, Channel channel, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(input, channel, output); - _kernel = std::move(k); -} - -void NEChannelExtract::configure(const IMultiImage *input, Channel channel, IImage *output) -{ - auto k = std::make_unique(); - k->configure(input, channel, output); - _kernel = std::move(k); -} diff --git a/src/runtime/NEON/functions/NEColorConvert.cpp b/src/runtime/NEON/functions/NEColorConvert.cpp deleted file mode 100644 index c7c9cdd923..0000000000 --- a/src/runtime/NEON/functions/NEColorConvert.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" - -#include "src/core/NEON/kernels/NEColorConvertKernel.h" - -#include - -using namespace arm_compute; - -void NEColorConvert::configure(const ITensor *input, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} - -void NEColorConvert::configure(const IMultiImage *input, IImage *output) -{ - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} - -void NEColorConvert::configure(const IImage *input, IMultiImage *output) -{ - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} - -void NEColorConvert::configure(const IMultiImage *input, IMultiImage *output) -{ - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp deleted file mode 100644 index 8ef42123db..0000000000 --- a/src/runtime/NEON/functions/NEDerivative.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEDerivative.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/NEON/kernels/NEDerivativeKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" - -namespace arm_compute -{ -NEDerivative::~NEDerivative() = default; - -NEDerivative::NEDerivative() - : _kernel(), _border_handler() -{ -} - -void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _kernel = std::make_unique(); - _border_handler = std::make_unique(); - - _kernel->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); - _border_handler->configure(input, BorderSize(1), border_mode, PixelValue(constant_border_value)); -} - -void NEDerivative::run() -{ - NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); - NEScheduler::get().schedule(_kernel.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDilate.cpp b/src/runtime/NEON/functions/NEDilate.cpp deleted file mode 100644 index 56523abd8a..0000000000 --- a/src/runtime/NEON/functions/NEDilate.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEDilate.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEDilateKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" - -#include - -using namespace arm_compute; - -void NEDilate::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) -{ - auto k = std::make_unique(); - k->configure(input, output, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp deleted file mode 100644 index 0b83b7dac7..0000000000 --- a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h" -#include "src/core/NEON/kernels/NEHistogramKernel.h" -#include "src/core/NEON/kernels/NEHistogramKernel.h" -#include "src/core/NEON/kernels/NETableLookupKernel.h" - -namespace arm_compute -{ -NEEqualizeHistogram::~NEEqualizeHistogram() = default; - -NEEqualizeHistogram::NEEqualizeHistogram() - : _histogram_kernel(), _cd_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8) -{ -} - -void NEEqualizeHistogram::configure(const IImage *input, IImage *output) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - _histogram_kernel = std::make_unique(); - _cd_histogram_kernel = std::make_unique(); - _map_histogram_kernel = std::make_unique(); - - // Configure kernels - _histogram_kernel->configure(input, &_hist); - _cd_histogram_kernel->configure(input, &_hist, &_cum_dist, &_cd_lut); - _map_histogram_kernel->configure(input, &_cd_lut, output); -} - -void NEEqualizeHistogram::run() -{ - // Calculate histogram of input. - NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY); - - // Calculate cumulative distribution of histogram and create LUT. - NEScheduler::get().schedule(_cd_histogram_kernel.get(), Window::DimY); - - // Map input to output using created LUT. - NEScheduler::get().schedule(_map_histogram_kernel.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEErode.cpp b/src/runtime/NEON/functions/NEErode.cpp deleted file mode 100644 index 83e266140a..0000000000 --- a/src/runtime/NEON/functions/NEErode.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEErode.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEErodeKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" - -#include - -namespace arm_compute -{ -void NEErode::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) -{ - auto k = std::make_unique(); - k->configure(input, output, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp deleted file mode 100644 index 5164d80947..0000000000 --- a/src/runtime/NEON/functions/NEFastCorners.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFastCornersKernel.h" -#include "src/core/NEON/kernels/NEFillArrayKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" - -namespace arm_compute -{ -NEFastCorners::~NEFastCorners() = default; - -NEFastCorners::NEFastCorners(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), - _fast_corners_kernel(), - _border_handler(), - _nonmax_kernel(), - _fill_kernel(), - _output(), - _suppressed(), - _non_max(false) -{ -} - -void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(BorderMode::UNDEFINED != border_mode); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(nullptr == corners); - ARM_COMPUTE_ERROR_ON(threshold < 1 && threshold > 255); - - _non_max = nonmax_suppression; - - TensorInfo tensor_info(input->info()->tensor_shape(), Format::U8); - _output.allocator()->init(tensor_info); - _memory_group.manage(&_output); - - _fast_corners_kernel = std::make_unique(); - _border_handler = std::make_unique(); - _fill_kernel = std::make_unique(); - // If border is UNDEFINED _fast_corners_kernel will operate in xwindow (3, - // width - 3) and ywindow (3, height -3) so the output image will leave the - // pixels on the borders unchanged. This is reflected in the valid region - // of the output. The non maxima suppression is only run on the valid - // pixels. - _fast_corners_kernel->configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode); - _border_handler->configure(input, _fast_corners_kernel->border_size(), border_mode, constant_border_value); - - if(!_non_max) - { - _fill_kernel->configure(&_output, 1 /* we keep all texels >0 */, corners); - } - else - { - _suppressed.allocator()->init(tensor_info); - _memory_group.manage(&_suppressed); - _nonmax_kernel = std::make_unique(); - _nonmax_kernel->configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode); - _fill_kernel->configure(&_suppressed, 1 /* we keep all texels >0 */, corners); - - // Allocate intermediate tensors - _suppressed.allocator()->allocate(); - } - - // Allocate intermediate tensors - _output.allocator()->allocate(); -} - -void NEFastCorners::run() -{ - NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); - - MemoryGroupResourceScope scope_mg(_memory_group); - - NEScheduler::get().schedule(_fast_corners_kernel.get(), Window::DimY); - - if(_non_max) - { - NEScheduler::get().schedule(_nonmax_kernel.get(), Window::DimY); - } - - NEScheduler::get().schedule(_fill_kernel.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGaussian3x3.cpp b/src/runtime/NEON/functions/NEGaussian3x3.cpp deleted file mode 100644 index 93e813c052..0000000000 --- a/src/runtime/NEON/functions/NEGaussian3x3.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEGaussian3x3Kernel.h" - -#include - -namespace arm_compute -{ -void NEGaussian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) -{ - auto k = std::make_unique(); - k->configure(input, output, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp deleted file mode 100644 index ed7e83b937..0000000000 --- a/src/runtime/NEON/functions/NEGaussian5x5.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" - -namespace arm_compute -{ -NEGaussian5x5::~NEGaussian5x5() = default; - -NEGaussian5x5::NEGaussian5x5(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _tmp(), _border_handler() -{ -} - -void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) -{ - // Init temporary buffer - TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::S16); - _tmp.allocator()->init(tensor_info); - - // Manage intermediate buffers - _memory_group.manage(&_tmp); - - _kernel_hor = std::make_unique(); - _kernel_vert = std::make_unique(); - _border_handler = std::make_unique(); - - // Create and configure kernels for the two passes - _kernel_hor->configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); - _kernel_vert->configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); - - _tmp.allocator()->allocate(); - - _border_handler->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); -} - -void NEGaussian5x5::run() -{ - NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); - - MemoryGroupResourceScope scope_mg(_memory_group); - - NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY); - NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp deleted file mode 100644 index c9a36fc466..0000000000 --- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Pyramid.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" -#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" -#include "src/core/NEON/kernels/NEScaleKernel.h" - -#include - -using namespace arm_compute; - -NEGaussianPyramid::NEGaussianPyramid() - : _input(nullptr), _pyramid(nullptr), _tmp() -{ -} - -NEGaussianPyramidHalf::~NEGaussianPyramidHalf() = default; - -NEGaussianPyramidHalf::NEGaussianPyramidHalf() // NOLINT - : _horizontal_border_handler(), - _vertical_border_handler(), - _horizontal_reduction(), - _vertical_reduction() -{ -} - -void NEGaussianPyramidHalf::configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(nullptr == pyramid); - ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); - ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_HALF != pyramid->info()->scale()); - - // Constant value to use for vertical fill border when the border mode is CONSTANT - const uint16_t pixel_value_u16 = static_cast(constant_border_value) * 2 + static_cast(constant_border_value) * 8 + static_cast(constant_border_value) * 6; - - /* Get number of pyramid levels */ - const size_t num_levels = pyramid->info()->num_levels(); - const size_t num_stages = num_levels - 1; - - _input = input; - _pyramid = pyramid; - - if(num_levels > 1) - { - // Apply half scale to the X dimension of the tensor shape - TensorShape tensor_shape = pyramid->info()->tensor_shape(); - tensor_shape.set(0, (pyramid->info()->width() + 1) * SCALE_PYRAMID_HALF); - - PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_HALF, tensor_shape, Format::S16); - _tmp.init(pyramid_info); - - _horizontal_reduction.clear(); - _vertical_reduction.clear(); - _horizontal_border_handler.clear(); - _vertical_border_handler.clear(); - - _horizontal_reduction.resize(num_stages); - _vertical_reduction.resize(num_stages); - _horizontal_border_handler.resize(num_stages); - _vertical_border_handler.resize(num_stages); - - for(size_t i = 0; i < num_stages; ++i) - { - /* Configure horizontal kernel */ - _horizontal_reduction[i] = std::make_unique(); - _horizontal_reduction[i]->configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i)); - - /* Configure vertical kernel */ - _vertical_reduction[i] = std::make_unique(); - _vertical_reduction[i]->configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1)); - - /* Configure border */ - _horizontal_border_handler[i] = std::make_unique(); - _horizontal_border_handler[i]->configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i]->border_size(), border_mode, PixelValue(constant_border_value)); - - /* Configure border */ - _vertical_border_handler[i] = std::make_unique(); - _vertical_border_handler[i]->configure(_tmp.get_pyramid_level(i), _vertical_reduction[i]->border_size(), border_mode, PixelValue(pixel_value_u16)); - } - - _tmp.allocate(); - } -} - -void NEGaussianPyramidHalf::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function"); - - /* Get number of pyramid levels */ - const unsigned int num_levels = _pyramid->info()->num_levels(); - - /* The first level of the pyramid has the input image */ - _pyramid->get_pyramid_level(0)->copy_from(*_input); - - for(unsigned int i = 0; i < num_levels - 1; ++i) - { - NEScheduler::get().schedule(_horizontal_border_handler[i].get(), Window::DimZ); - NEScheduler::get().schedule(_horizontal_reduction[i].get(), Window::DimY); - NEScheduler::get().schedule(_vertical_border_handler[i].get(), Window::DimZ); - NEScheduler::get().schedule(_vertical_reduction[i].get(), Window::DimY); - } -} - -NEGaussianPyramidOrb::~NEGaussianPyramidOrb() = default; - -NEGaussianPyramidOrb::NEGaussianPyramidOrb() // NOLINT - : _gaus5x5(), - _scale_nearest() -{ -} - -void NEGaussianPyramidOrb::configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(nullptr == pyramid); - ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); - ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_ORB != pyramid->info()->scale()); - - /* Get number of pyramid levels */ - const size_t num_levels = pyramid->info()->num_levels(); - const size_t num_stages = num_levels - 1; - - _input = input; - _pyramid = pyramid; - - _gaus5x5.clear(); - _scale_nearest.clear(); - - _gaus5x5.resize(num_stages); - _scale_nearest.resize(num_stages); - - if(num_levels > 1) - { - PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8); - _tmp.init(pyramid_info); - - for(size_t i = 0; i < num_levels - 1; ++i) - { - /* Configure gaussian 5x5 */ - _gaus5x5[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value); - - /* Configure scale */ - _scale_nearest[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, PixelValue(), SamplingPolicy::CENTER, false }); - } - - _tmp.allocate(); - } -} - -void NEGaussianPyramidOrb::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function"); - - /* Get number of pyramid levels */ - const size_t num_levels = _pyramid->info()->num_levels(); - - /* The first level of the pyramid has the input image */ - _pyramid->get_pyramid_level(0)->copy_from(*_input); - - for(unsigned int i = 0; i < num_levels - 1; ++i) - { - _gaus5x5[i].run(); - _scale_nearest[i].run(); - } -} diff --git a/src/runtime/NEON/functions/NEHOGDescriptor.cpp b/src/runtime/NEON/functions/NEHOGDescriptor.cpp deleted file mode 100644 index bb125a1eae..0000000000 --- a/src/runtime/NEON/functions/NEHOGDescriptor.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/HOGInfo.h" -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/NEON/kernels/NEDerivativeKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" - -namespace arm_compute -{ -NEHOGDescriptor::~NEHOGDescriptor() = default; - -NEHOGDescriptor::NEHOGDescriptor(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space() -{ -} - -void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(nullptr == output); - ARM_COMPUTE_ERROR_ON(nullptr == hog); - - const HOGInfo *hog_info = hog->info(); - const size_t width = input->info()->dimension(Window::DimX); - const size_t height = input->info()->dimension(Window::DimY); - const size_t num_bins = hog_info->num_bins(); - - Size2D cell_size = hog_info->cell_size(); - - // Calculate number of cells along the x and y directions for the hog_space - const size_t num_cells_x = width / cell_size.width; - const size_t num_cells_y = height / cell_size.height; - - // TensorShape of the input image - const TensorShape &shape_img = input->info()->tensor_shape(); - - // TensorShape of the hog space - TensorShape shape_hog_space = input->info()->tensor_shape(); - shape_hog_space.set(Window::DimX, num_cells_x); - shape_hog_space.set(Window::DimY, num_cells_y); - - // Allocate memory for magnitude, phase and hog space - TensorInfo info_mag(shape_img, Format::S16); - _mag.allocator()->init(info_mag); - - TensorInfo info_phase(shape_img, Format::U8); - _phase.allocator()->init(info_phase); - - TensorInfo info_space(shape_hog_space, num_bins, DataType::F32); - _hog_space.allocator()->init(info_space); - - // Manage intermediate buffers - _memory_group.manage(&_mag); - _memory_group.manage(&_phase); - - // Initialise gradient kernel - _gradient.configure(input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value); - - // Manage intermediate buffers - _memory_group.manage(&_hog_space); - - // Initialise orientation binning kernel - _orient_bin = std::make_unique(); - _orient_bin->configure(&_mag, &_phase, &_hog_space, hog->info()); - - // Initialize HOG norm kernel - _block_norm = std::make_unique(); - _block_norm->configure(&_hog_space, output, hog->info()); - - // Allocate intermediate tensors - _mag.allocator()->allocate(); - _phase.allocator()->allocate(); - _hog_space.allocator()->allocate(); -} - -void NEHOGDescriptor::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - // Run gradient - _gradient.run(); - - // Run orientation binning kernel - NEScheduler::get().schedule(_orient_bin.get(), Window::DimY); - - // Run block normalization kernel - NEScheduler::get().schedule(_block_norm.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGDetector.cpp b/src/runtime/NEON/functions/NEHOGDetector.cpp deleted file mode 100644 index 3eda1b0ce0..0000000000 --- a/src/runtime/NEON/functions/NEHOGDetector.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" - -#include "src/core/NEON/kernels/NEHOGDetectorKernel.h" - -namespace arm_compute -{ -NEHOGDetector::~NEHOGDetector() = default; - -void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class) -{ - auto k = std::make_unique(); - k->configure(input, hog, detection_windows, detection_window_stride, threshold, idx_class); - _kernel = std::move(k); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGGradient.cpp b/src/runtime/NEON/functions/NEHOGGradient.cpp deleted file mode 100644 index f5a47735a9..0000000000 --- a/src/runtime/NEON/functions/NEHOGGradient.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" - -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/NEON/kernels/NEDerivativeKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" - -namespace arm_compute -{ -NEHOGGradient::~NEHOGGradient() = default; - -NEHOGGradient::NEHOGGradient(std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _derivative(), - _mag_phase(nullptr), - _gx(), - _gy() -{ -} - -void NEHOGGradient::configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_magnitude, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_phase, 1, DataType::U8); - - const TensorShape &shape_img = input->info()->tensor_shape(); - - // Allocate image memory - TensorInfo info(shape_img, Format::S16); - _gx.allocator()->init(info); - _gy.allocator()->init(info); - - // Manage intermediate buffers - _memory_group.manage(&_gx); - _memory_group.manage(&_gy); - - // Initialise derivate kernel - _derivative.configure(input, &_gx, &_gy, border_mode, constant_border_value); - - // Initialise magnitude/phase kernel - if(PhaseType::UNSIGNED == phase_type) - { - auto k = std::make_unique>(); - k->configure(&_gx, &_gy, output_magnitude, output_phase); - _mag_phase = std::move(k); - } - else - { - auto k = std::make_unique>(); - k->configure(&_gx, &_gy, output_magnitude, output_phase); - _mag_phase = std::move(k); - } - - // Allocate intermediate tensors - _gx.allocator()->allocate(); - _gy.allocator()->allocate(); -} - -void NEHOGGradient::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - // Run derivative - _derivative.run(); - - // Run magnitude/phase kernel - NEScheduler::get().schedule(_mag_phase.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp deleted file mode 100644 index 3e41faad43..0000000000 --- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Tensor.h" -#include "src/core/NEON/kernels/NEDerivativeKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEHOGDescriptorKernel.h" - -namespace arm_compute -{ -NEHOGMultiDetection::~NEHOGMultiDetection() = default; - -NEHOGMultiDetection::NEHOGMultiDetection(std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _gradient_kernel(), - _orient_bin_kernel(), - _block_norm_kernel(), - _hog_detect_kernel(), - _non_maxima_kernel(), - _hog_space(), - _hog_norm_space(), - _detection_windows(), - _mag(), - _phase(), - _non_maxima_suppression(false), - _num_orient_bin_kernel(0), - _num_block_norm_kernel(0), - _num_hog_detect_kernel(0) -{ -} - -void NEHOGMultiDetection::configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, - uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(multi_hog); - ARM_COMPUTE_ERROR_ON(nullptr == detection_windows); - ARM_COMPUTE_ERROR_ON(detection_window_strides->num_values() != multi_hog->num_models()); - - const size_t width = input->info()->dimension(Window::DimX); - const size_t height = input->info()->dimension(Window::DimY); - const TensorShape &shape_img = input->info()->tensor_shape(); - const size_t num_models = multi_hog->num_models(); - PhaseType phase_type = multi_hog->model(0)->info()->phase_type(); - - size_t prev_num_bins = multi_hog->model(0)->info()->num_bins(); - Size2D prev_cell_size = multi_hog->model(0)->info()->cell_size(); - Size2D prev_block_size = multi_hog->model(0)->info()->block_size(); - Size2D prev_block_stride = multi_hog->model(0)->info()->block_stride(); - - /* Check if NEHOGOrientationBinningKernel and NEHOGBlockNormalizationKernel kernels can be skipped for a specific HOG data-object - * - * 1) NEHOGOrientationBinningKernel and NEHOGBlockNormalizationKernel are skipped if the cell size and the number of bins don't change. - * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th - * 2) NEHOGBlockNormalizationKernel is skipped if the cell size, the number of bins and block size do not change. - * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th - * - * @note Since the orientation binning and block normalization kernels can be skipped, we need to keep track of the input to process for each kernel - * with "input_orient_bin", "input_hog_detect" and "input_block_norm" - */ - std::vector input_orient_bin; - std::vector input_hog_detect; - std::vector> input_block_norm; - - input_orient_bin.push_back(0); - input_hog_detect.push_back(0); - input_block_norm.emplace_back(0, 0); - - for(size_t i = 1; i < num_models; ++i) - { - size_t cur_num_bins = multi_hog->model(i)->info()->num_bins(); - Size2D cur_cell_size = multi_hog->model(i)->info()->cell_size(); - Size2D cur_block_size = multi_hog->model(i)->info()->block_size(); - Size2D cur_block_stride = multi_hog->model(i)->info()->block_stride(); - - if((cur_num_bins != prev_num_bins) || (cur_cell_size.width != prev_cell_size.width) || (cur_cell_size.height != prev_cell_size.height)) - { - prev_num_bins = cur_num_bins; - prev_cell_size = cur_cell_size; - prev_block_size = cur_block_size; - prev_block_stride = cur_block_stride; - - // Compute orientation binning and block normalization kernels. Update input to process - input_orient_bin.push_back(i); - input_block_norm.emplace_back(i, input_orient_bin.size() - 1); - } - else if((cur_block_size.width != prev_block_size.width) || (cur_block_size.height != prev_block_size.height) || (cur_block_stride.width != prev_block_stride.width) - || (cur_block_stride.height != prev_block_stride.height)) - { - prev_block_size = cur_block_size; - prev_block_stride = cur_block_stride; - - // Compute block normalization kernel. Update input to process - input_block_norm.emplace_back(i, input_orient_bin.size() - 1); - } - - // Update input to process for hog detector kernel - input_hog_detect.push_back(input_block_norm.size() - 1); - } - - _detection_windows = detection_windows; - _non_maxima_suppression = non_maxima_suppression; - _num_orient_bin_kernel = input_orient_bin.size(); // Number of NEHOGOrientationBinningKernel kernels to compute - _num_block_norm_kernel = input_block_norm.size(); // Number of NEHOGBlockNormalizationKernel kernels to compute - _num_hog_detect_kernel = input_hog_detect.size(); // Number of NEHOGDetector functions to compute - - _orient_bin_kernel.clear(); - _block_norm_kernel.clear(); - _hog_detect_kernel.clear(); - _hog_space.clear(); - _hog_norm_space.clear(); - - _orient_bin_kernel.resize(_num_orient_bin_kernel); - _block_norm_kernel.resize(_num_block_norm_kernel); - _hog_detect_kernel.resize(_num_hog_detect_kernel); - _hog_space.resize(_num_orient_bin_kernel); - _hog_norm_space.resize(_num_block_norm_kernel); - _non_maxima_kernel = CPPDetectionWindowNonMaximaSuppressionKernel(); - - // Allocate tensors for magnitude and phase - TensorInfo info_mag(shape_img, Format::S16); - _mag.allocator()->init(info_mag); - - TensorInfo info_phase(shape_img, Format::U8); - _phase.allocator()->init(info_phase); - - // Manage intermediate buffers - _memory_group.manage(&_mag); - _memory_group.manage(&_phase); - - // Initialise gradient kernel - _gradient_kernel.configure(input, &_mag, &_phase, phase_type, border_mode, constant_border_value); - - // Configure NETensor for the HOG space and orientation binning kernel - for(size_t i = 0; i < _num_orient_bin_kernel; ++i) - { - const size_t idx_multi_hog = input_orient_bin[i]; - - // Get the corresponding cell size and number of bins - const Size2D &cell = multi_hog->model(idx_multi_hog)->info()->cell_size(); - const size_t num_bins = multi_hog->model(idx_multi_hog)->info()->num_bins(); - - // Calculate number of cells along the x and y directions for the hog_space - const size_t num_cells_x = width / cell.width; - const size_t num_cells_y = height / cell.height; - - // TensorShape of hog space - TensorShape shape_hog_space = input->info()->tensor_shape(); - shape_hog_space.set(Window::DimX, num_cells_x); - shape_hog_space.set(Window::DimY, num_cells_y); - - // Allocate HOG space - TensorInfo info_space(shape_hog_space, num_bins, DataType::F32); - _hog_space[i].allocator()->init(info_space); - - // Manage intermediate buffers - _memory_group.manage(&_hog_space[i]); - - // Initialise orientation binning kernel - _orient_bin_kernel[i].configure(&_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info()); - } - - // Allocate intermediate tensors - _mag.allocator()->allocate(); - _phase.allocator()->allocate(); - - // Configure NETensor for the normalized HOG space and block normalization kernel - for(size_t i = 0; i < _num_block_norm_kernel; ++i) - { - const size_t idx_multi_hog = input_block_norm[i].first; - const size_t idx_orient_bin = input_block_norm[i].second; - - // Allocate normalized HOG space - TensorInfo tensor_info(*(multi_hog->model(idx_multi_hog)->info()), width, height); - _hog_norm_space[i].allocator()->init(tensor_info); - - // Manage intermediate buffers - _memory_group.manage(&_hog_norm_space[i]); - - // Initialize block normalization kernel - _block_norm_kernel[i].configure(&_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info()); - } - - // Allocate intermediate tensors - for(size_t i = 0; i < _num_orient_bin_kernel; ++i) - { - _hog_space[i].allocator()->allocate(); - } - - // Configure HOG detector kernel - for(size_t i = 0; i < _num_hog_detect_kernel; ++i) - { - const size_t idx_block_norm = input_hog_detect[i]; - - _hog_detect_kernel[i].configure(&_hog_norm_space[idx_block_norm], multi_hog->model(i), detection_windows, detection_window_strides->at(i), threshold, i); - } - - // Configure non maxima suppression kernel - _non_maxima_kernel.configure(_detection_windows, min_distance); - - // Allocate intermediate tensors - for(size_t i = 0; i < _num_block_norm_kernel; ++i) - { - _hog_norm_space[i].allocator()->allocate(); - } -} - -void NEHOGMultiDetection::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(_detection_windows == nullptr, "Unconfigured function"); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Reset detection window - _detection_windows->clear(); - - // Run gradient - _gradient_kernel.run(); - - // Run orientation binning kernel - for(auto &kernel : _orient_bin_kernel) - { - NEScheduler::get().schedule(&kernel, Window::DimY); - } - - // Run block normalization kernel - for(auto &kernel : _block_norm_kernel) - { - NEScheduler::get().schedule(&kernel, Window::DimY); - } - - // Run HOG detector kernel - for(auto &kernel : _hog_detect_kernel) - { - kernel.run(); - } - - // Run non-maxima suppression kernel if enabled - if(_non_maxima_suppression) - { - NEScheduler::get().schedule(&_non_maxima_kernel, Window::DimY); - } -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp deleted file mode 100644 index 6b15596f8a..0000000000 --- a/src/runtime/NEON/functions/NEHarrisCorners.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" -#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" -#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEHarrisCornersKernel.h" -#include "src/core/NEON/kernels/NESobel5x5Kernel.h" -#include "src/core/NEON/kernels/NESobel7x7Kernel.h" - -#include -#include - -namespace arm_compute -{ -NEHarrisCorners::~NEHarrisCorners() = default; - -NEHarrisCorners::NEHarrisCorners(std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _sobel(), - _harris_score(), - _non_max_suppr(), - _candidates(), - _sort_euclidean(), - _border_gx(), - _border_gy(), - _gx(), - _gy(), - _score(), - _nonmax(), - _corners_list(), - _num_corner_candidates(0) -{ -} - -void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist, - float sensitivity, int32_t gradient_size, int32_t block_size, KeyPointArray *corners, - BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(!(block_size == 3 || block_size == 5 || block_size == 7)); - - const TensorShape shape = input->info()->tensor_shape(); - TensorInfo tensor_info_gxgy; - - if(gradient_size < 7) - { - tensor_info_gxgy.init(shape, Format::S16); - } - else - { - tensor_info_gxgy.init(shape, Format::S32); - } - - _gx.allocator()->init(tensor_info_gxgy); - _gy.allocator()->init(tensor_info_gxgy); - - // Manage intermediate buffers - _memory_group.manage(&_gx); - _memory_group.manage(&_gy); - - TensorInfo tensor_info_score(shape, Format::F32); - _score.allocator()->init(tensor_info_score); - _nonmax.allocator()->init(tensor_info_score); - - _corners_list.resize(shape.x() * shape.y()); - - // Set/init Sobel kernel accordingly with gradient_size - switch(gradient_size) - { - case 3: - { - auto k = std::make_unique(); - k->configure(input, &_gx, &_gy, border_mode, constant_border_value); - _sobel = std::move(k); - break; - } - case 5: - { - auto k = std::make_unique(); - k->configure(input, &_gx, &_gy, border_mode, constant_border_value); - _sobel = std::move(k); - break; - } - case 7: - { - auto k = std::make_unique(); - k->configure(input, &_gx, &_gy, border_mode, constant_border_value); - _sobel = std::move(k); - break; - } - default: - ARM_COMPUTE_ERROR("Gradient size not implemented"); - } - - // Normalization factor - const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size); - - // Manage intermediate buffers - _memory_group.manage(&_score); - - // Set/init Harris Score kernel accordingly with block_size - switch(block_size) - { - case 3: - { - auto k = std::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - break; - case 5: - { - auto k = std::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - break; - case 7: - { - auto k = std::make_unique>(); - k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); - _harris_score = std::move(k); - } - default: - break; - } - - // Configure border filling before harris score - _border_gx = std::make_unique(); - _border_gy = std::make_unique(); - _border_gx->configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value); - _border_gy->configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value); - - // Allocate once all the configure methods have been called - _gx.allocator()->allocate(); - _gy.allocator()->allocate(); - - // Manage intermediate buffers - _memory_group.manage(&_nonmax); - - // Init non-maxima suppression function - _non_max_suppr.configure(&_score, &_nonmax, border_mode); - - // Allocate once all the configure methods have been called - _score.allocator()->allocate(); - - // Init corner candidates kernel - _candidates.configure(&_nonmax, _corners_list.data(), &_num_corner_candidates); - - // Allocate once all the configure methods have been called - _nonmax.allocator()->allocate(); - - // Init euclidean distance - _sort_euclidean.configure(_corners_list.data(), corners, &_num_corner_candidates, min_dist); -} - -void NEHarrisCorners::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function"); - - MemoryGroupResourceScope scope_mg(_memory_group); - - // Init to 0 number of corner candidates - _num_corner_candidates = 0; - - // Run Sobel kernel - _sobel->run(); - - // Fill border before harris score kernel - NEScheduler::get().schedule(_border_gx.get(), Window::DimZ); - NEScheduler::get().schedule(_border_gy.get(), Window::DimZ); - - // Run harris score kernel - NEScheduler::get().schedule(_harris_score.get(), Window::DimY); - - // Run non-maxima suppression - _non_max_suppr.run(); - - // Run corner candidate kernel - NEScheduler::get().schedule(&_candidates, Window::DimY); - - // Run sort & euclidean distance - NEScheduler::get().schedule(&_sort_euclidean, Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEHistogram.cpp b/src/runtime/NEON/functions/NEHistogram.cpp deleted file mode 100644 index 1b093d60e5..0000000000 --- a/src/runtime/NEON/functions/NEHistogram.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEHistogram.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/IDistribution1D.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/NEON/kernels/NEHistogramKernel.h" - -namespace arm_compute -{ -NEHistogram::~NEHistogram() = default; - -NEHistogram::NEHistogram() - : _histogram_kernel(), _local_hist(), _window_lut(window_lut_default_size), _local_hist_size(0) -{ -} - -void NEHistogram::configure(const IImage *input, IDistribution1D *output) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - // Allocate space for threads local histograms - _local_hist_size = output->num_bins() * NEScheduler::get().num_threads(); - _local_hist.resize(_local_hist_size); - - // Configure kernel - _histogram_kernel = std::make_unique(); - _histogram_kernel->configure(input, output, _local_hist.data(), _window_lut.data()); -} - -void NEHistogram::run() -{ - // Calculate histogram of input. - NEScheduler::get().schedule(_histogram_kernel.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp deleted file mode 100644 index 38f04247f6..0000000000 --- a/src/runtime/NEON/functions/NEIntegralImage.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEIntegralImageKernel.h" - -#include - -namespace arm_compute -{ -NEIntegralImage::~NEIntegralImage() = default; - -void NEIntegralImage::configure(const ITensor *input, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(output, _kernel->border_size(), BorderMode::CONSTANT, PixelValue()); - _border_handler = std::move(b); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp deleted file mode 100644 index a2651dbf36..0000000000 --- a/src/runtime/NEON/functions/NELaplacianPyramid.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/IPyramid.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" -#include "arm_compute/runtime/Tensor.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEGaussian5x5Kernel.h" -#include "src/core/NEON/kernels/NEGaussianPyramidKernel.h" - -namespace arm_compute -{ -NELaplacianPyramid::~NELaplacianPyramid() = default; - -NELaplacianPyramid::NELaplacianPyramid() // NOLINT - : _num_levels(0), - _gaussian_pyr_function(), - _convf(), - _subf(), - _gauss_pyr(), - _conv_pyr(), - _depth_function() -{ -} - -void NELaplacianPyramid::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(0 == _num_levels, "Unconfigured function"); - - // Compute Gaussian Pyramid - _gaussian_pyr_function.run(); - - for(unsigned int i = 0; i < _num_levels; ++i) - { - // Apply Gaussian filter to gaussian pyramid image - _convf[i].run(); - } - - for(unsigned int i = 0; i < _num_levels; ++i) - { - // Compute laplacian image - _subf[i].run(); - } - - _depth_function.run(); -} - -void NELaplacianPyramid::configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON(nullptr == pyramid); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON(0 == pyramid->info()->num_levels()); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1)); - - _num_levels = pyramid->info()->num_levels(); - - // Create and initialize the gaussian pyramid and the convoluted pyramid - PyramidInfo pyramid_info; - pyramid_info.init(_num_levels, 0.5f, pyramid->info()->tensor_shape(), arm_compute::Format::U8); - - _gauss_pyr.init(pyramid_info); - _conv_pyr.init(pyramid_info); - - // Create Gaussian Pyramid function - _gaussian_pyr_function.configure(input, &_gauss_pyr, border_mode, constant_border_value); - - _convf.resize(_num_levels); - _subf.resize(_num_levels); - - for(unsigned int i = 0; i < _num_levels; ++i) - { - _convf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), border_mode, constant_border_value); - _subf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), pyramid->get_pyramid_level(i), ConvertPolicy::WRAP); - } - - _depth_function.configure(_conv_pyr.get_pyramid_level(_num_levels - 1), output, ConvertPolicy::WRAP, 0); - - _gauss_pyr.allocate(); - _conv_pyr.allocate(); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp deleted file mode 100644 index a50e7ccbef..0000000000 --- a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" - -#include "arm_compute/core/CPP/ICPPKernel.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/IPyramid.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" - -#include - -namespace arm_compute -{ -NELaplacianReconstruct::~NELaplacianReconstruct() = default; - -NELaplacianReconstruct::NELaplacianReconstruct() // NOLINT - : _tmp_pyr(), - _addf(), - _scalef(), - _depthf() -{ -} - -void NELaplacianReconstruct::configure(const IPyramid *pyramid, ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON(nullptr == pyramid); - ARM_COMPUTE_ERROR_ON(input == output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); - ARM_COMPUTE_ERROR_ON(output->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(0)->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(0)->info()->dimension(1)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0)); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1)); - - const size_t num_levels = pyramid->info()->num_levels(); - - // Create and initialize the tmp pyramid: I(n-2) = upsample( input + Laplace(n-1) ) - PyramidInfo pyramid_info; - pyramid_info.init(num_levels, 0.5f, output->info()->tensor_shape(), arm_compute::Format::S16); - - _tmp_pyr.init(pyramid_info); - - // Allocate add and scale functions. Level 0 does not need to be scaled. - _addf.resize(num_levels); - _scalef.resize(num_levels - 1); - - const size_t last_level = num_levels - 1; - - _addf[last_level].configure(input, pyramid->get_pyramid_level(last_level), _tmp_pyr.get_pyramid_level(last_level), ConvertPolicy::SATURATE); - - // Scale levels n-1 to 1, and add levels n-2 to 0 - for(size_t l = 0; l < last_level; ++l) - { - _scalef[l].configure(_tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), ScaleKernelInfo{ arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value, SamplingPolicy::CENTER, false }); - _addf[l].configure(_tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE); - } - - // Convert level 0 from S16 to U8 - _depthf.configure(_tmp_pyr.get_pyramid_level(0), output, ConvertPolicy::SATURATE, 0); - - _tmp_pyr.allocate(); -} - -void NELaplacianReconstruct::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(_addf.empty(), "Unconfigured function"); - - const size_t last_level = _tmp_pyr.info()->num_levels() - 1; - - _addf[last_level].run(); - - // Run l = [last_level - 1, 0] - for(size_t l = last_level; l-- > 0;) - { - _scalef[l].run(); - _addf[l].run(); - } - - _depthf.run(); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMagnitude.cpp b/src/runtime/NEON/functions/NEMagnitude.cpp deleted file mode 100644 index 34d9a7fb0b..0000000000 --- a/src/runtime/NEON/functions/NEMagnitude.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" - -#include "arm_compute/core/Types.h" -#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" - -#include - -namespace arm_compute -{ -NEMagnitude::~NEMagnitude() = default; - -void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type) -{ - if(mag_type == MagnitudeType::L1NORM) - { - auto k = std::make_unique>(); - k->configure(input1, input2, output, nullptr); - _kernel = std::move(k); - } - else - { - auto k = std::make_unique>(); - k->configure(input1, input2, output, nullptr); - _kernel = std::move(k); - } -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp deleted file mode 100644 index 6e2d7fc81d..0000000000 --- a/src/runtime/NEON/functions/NEMeanStdDev.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" - -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEMeanStdDevKernel.h" - -namespace arm_compute -{ -NEMeanStdDev::~NEMeanStdDev() = default; - -NEMeanStdDev::NEMeanStdDev() - : _mean_stddev_kernel(), _fill_border_kernel(), _global_sum(0), _global_sum_squared(0) -{ -} - -void NEMeanStdDev::configure(IImage *input, float *mean, float *stddev) -{ - _mean_stddev_kernel = std::make_unique(); - _fill_border_kernel = std::make_unique(); - - _mean_stddev_kernel->configure(input, mean, &_global_sum, stddev, &_global_sum_squared); - _fill_border_kernel->configure(input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0))); -} - -void NEMeanStdDev::run() -{ - _global_sum = 0; - _global_sum_squared = 0; - - NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimZ); - NEScheduler::get().schedule(_mean_stddev_kernel.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEMedian3x3.cpp b/src/runtime/NEON/functions/NEMedian3x3.cpp deleted file mode 100644 index 4d117783ed..0000000000 --- a/src/runtime/NEON/functions/NEMedian3x3.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEMedian3x3Kernel.h" - -#include - -namespace arm_compute -{ -void NEMedian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) -{ - auto k = std::make_unique(); - k->configure(input, output, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEMinMaxLocation.cpp b/src/runtime/NEON/functions/NEMinMaxLocation.cpp deleted file mode 100644 index ffbc33bc2e..0000000000 --- a/src/runtime/NEON/functions/NEMinMaxLocation.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" - -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/NEON/kernels/NEMinMaxLocationKernel.h" - -namespace arm_compute -{ -NEMinMaxLocation::~NEMinMaxLocation() = default; - -NEMinMaxLocation::NEMinMaxLocation() - : _min_max(), _min_max_loc() -{ -} - -void NEMinMaxLocation::configure(const IImage *input, void *min, void *max, ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count) -{ - _min_max = std::make_unique(); - _min_max->configure(input, min, max); - - _min_max_loc = std::make_unique(); - _min_max_loc->configure(input, min, max, min_loc, max_loc, min_count, max_count); -} - -void NEMinMaxLocation::run() -{ - _min_max->reset(); - - /* Run min max kernel */ - NEScheduler::get().schedule(_min_max.get(), Window::DimY); - - /* Run min max location */ - NEScheduler::get().schedule(_min_max_loc.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NENonLinearFilter.cpp b/src/runtime/NEON/functions/NENonLinearFilter.cpp deleted file mode 100644 index f3acabfa6d..0000000000 --- a/src/runtime/NEON/functions/NENonLinearFilter.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NENonLinearFilterKernel.h" - -#include - -namespace arm_compute -{ -void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - BorderMode border_mode, - uint8_t constant_border_value) -{ - auto k = std::make_unique(); - k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEOpticalFlow.cpp b/src/runtime/NEON/functions/NEOpticalFlow.cpp deleted file mode 100644 index a868208aaf..0000000000 --- a/src/runtime/NEON/functions/NEOpticalFlow.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Window.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" -#include "arm_compute/runtime/Pyramid.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NELKTrackerKernel.h" - -namespace arm_compute -{ -NEOpticalFlow::~NEOpticalFlow() = default; - -NEOpticalFlow::NEOpticalFlow(std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _func_scharr(), - _kernel_tracker(), - _scharr_gx(), - _scharr_gy(), - _new_points(nullptr), - _new_points_estimates(nullptr), - _old_points(nullptr), - _new_points_internal(), - _old_points_internal(), - _num_levels(0) -{ -} - -void NEOpticalFlow::configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, - IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, - bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON(nullptr == old_pyramid); - ARM_COMPUTE_ERROR_ON(nullptr == new_pyramid); - ARM_COMPUTE_ERROR_ON(nullptr == old_points); - ARM_COMPUTE_ERROR_ON(nullptr == new_points_estimates); - ARM_COMPUTE_ERROR_ON(nullptr == new_points); - ARM_COMPUTE_ERROR_ON(old_pyramid->info()->num_levels() != new_pyramid->info()->num_levels()); - ARM_COMPUTE_ERROR_ON(0 == old_pyramid->info()->num_levels()); - ARM_COMPUTE_ERROR_ON(old_pyramid->info()->width() != new_pyramid->info()->width()); - ARM_COMPUTE_ERROR_ON(old_pyramid->info()->height() != new_pyramid->info()->height()); - ARM_COMPUTE_ERROR_ON(use_initial_estimate && old_points->num_values() != new_points_estimates->num_values()); - - _num_levels = old_pyramid->info()->num_levels(); - _old_points = old_points; - _new_points = new_points; - _new_points_estimates = new_points_estimates; - - const float pyr_scale = old_pyramid->info()->scale(); - - _func_scharr.clear(); - _kernel_tracker.clear(); - _scharr_gx.clear(); - _scharr_gy.clear(); - - _func_scharr.resize(_num_levels); - _kernel_tracker.resize(_num_levels); - _scharr_gx.resize(_num_levels); - _scharr_gy.resize(_num_levels); - - _old_points_internal = LKInternalKeypointArray(old_points->num_values()); - _new_points_internal = LKInternalKeypointArray(old_points->num_values()); - _new_points->resize(old_points->num_values()); - - for(unsigned int i = 0; i < _num_levels; ++i) - { - // Get images from the ith level of old and right pyramid - IImage *old_ith_input = old_pyramid->get_pyramid_level(i); - IImage *new_ith_input = new_pyramid->get_pyramid_level(i); - - // Get width and height of images - const unsigned int width_ith = old_ith_input->info()->dimension(0); - const unsigned int height_ith = new_ith_input->info()->dimension(1); - - TensorInfo tensor_info(TensorShape(width_ith, height_ith), Format::S16); - - _scharr_gx[i].allocator()->init(tensor_info); - _scharr_gy[i].allocator()->init(tensor_info); - - // Manage intermediate buffers - _memory_group.manage(&_scharr_gx[i]); - _memory_group.manage(&_scharr_gy[i]); - - // Init Scharr kernel - _func_scharr[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value); - - // Init Lucas-Kanade kernel - _kernel_tracker[i] = std::make_unique(); - _kernel_tracker[i]->configure(old_ith_input, new_ith_input, &_scharr_gx[i], &_scharr_gy[i], - old_points, new_points_estimates, new_points, - &_old_points_internal, &_new_points_internal, - termination, use_initial_estimate, epsilon, num_iterations, window_dimension, - i, _num_levels, pyr_scale); - - _scharr_gx[i].allocator()->allocate(); - _scharr_gy[i].allocator()->allocate(); - } -} - -void NEOpticalFlow::run() -{ - ARM_COMPUTE_ERROR_ON_MSG(_num_levels == 0, "Unconfigured function"); - - MemoryGroupResourceScope scope_mg(_memory_group); - - for(unsigned int level = _num_levels; level > 0; --level) - { - // Run Scharr kernel - _func_scharr[level - 1].run(); - - // Run Lucas-Kanade kernel - NEScheduler::get().schedule(_kernel_tracker[level - 1].get(), Window::DimX); - } -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEPhase.cpp b/src/runtime/NEON/functions/NEPhase.cpp deleted file mode 100644 index 3b69a10e7f..0000000000 --- a/src/runtime/NEON/functions/NEPhase.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEPhase.h" - -#include "src/core/NEON/kernels/NEMagnitudePhaseKernel.h" - -#include - -namespace arm_compute -{ -void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type) -{ - if(phase_type == PhaseType::UNSIGNED) - { - auto k = std::make_unique>(); - k->configure(input1, input2, nullptr, output); - _kernel = std::move(k); - } - else - { - auto k = std::make_unique>(); - k->configure(input1, input2, nullptr, output); - _kernel = std::move(k); - } -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NERemap.cpp b/src/runtime/NEON/functions/NERemap.cpp deleted file mode 100644 index f2f57aa599..0000000000 --- a/src/runtime/NEON/functions/NERemap.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NERemap.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NERemapKernel.h" - -#include - -namespace arm_compute -{ -void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported"); - - auto k = std::make_unique(); - k->configure(input, map_x, map_y, output, policy); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEScharr3x3.cpp b/src/runtime/NEON/functions/NEScharr3x3.cpp deleted file mode 100644 index 414e9470ea..0000000000 --- a/src/runtime/NEON/functions/NEScharr3x3.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEScharr3x3Kernel.h" - -#include - -using namespace arm_compute; - -void NEScharr3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) -{ - auto k = std::make_unique(); - k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} diff --git a/src/runtime/NEON/functions/NESobel3x3.cpp b/src/runtime/NEON/functions/NESobel3x3.cpp deleted file mode 100644 index 1a57bc3fc6..0000000000 --- a/src/runtime/NEON/functions/NESobel3x3.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" - -#include "arm_compute/core/PixelValue.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NESobel3x3Kernel.h" - -#include - -namespace arm_compute -{ -void NESobel3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) -{ - auto k = std::make_unique(); - k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); - _kernel = std::move(k); - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); - _border_handler = std::move(b); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp deleted file mode 100644 index e587981fa9..0000000000 --- a/src/runtime/NEON/functions/NESobel5x5.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NESobel5x5Kernel.h" - -namespace arm_compute -{ -NESobel5x5::~NESobel5x5() = default; - -NESobel5x5::NESobel5x5(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() -{ -} - -void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - - const bool run_sobel_x = output_x != nullptr; - const bool run_sobel_y = output_y != nullptr; - - TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16); - - _sobel_hor = std::make_unique(); - _sobel_vert = std::make_unique(); - _border_handler = std::make_unique(); - - if(run_sobel_x && run_sobel_y) - { - _tmp_x.allocator()->init(tensor_info); - _tmp_y.allocator()->init(tensor_info); - _memory_group.manage(&_tmp_x); - _memory_group.manage(&_tmp_y); - _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); - _tmp_x.allocator()->allocate(); - _tmp_y.allocator()->allocate(); - } - else if(run_sobel_x) - { - _tmp_x.allocator()->init(tensor_info); - _memory_group.manage(&_tmp_x); - _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); - _tmp_x.allocator()->allocate(); - } - else if(run_sobel_y) - { - _tmp_y.allocator()->init(tensor_info); - _memory_group.manage(&_tmp_y); - _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); - _tmp_y.allocator()->allocate(); - } - - _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); -} - -void NESobel5x5::run() -{ - NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); - - MemoryGroupResourceScope scope_mg(_memory_group); - - NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY); - NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp deleted file mode 100644 index 7b1a975951..0000000000 --- a/src/runtime/NEON/functions/NESobel7x7.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NESobel7x7Kernel.h" - -namespace arm_compute -{ -NESobel7x7::~NESobel7x7() = default; - -NESobel7x7::NESobel7x7(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() -{ -} - -void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - - const bool run_sobel_x = output_x != nullptr; - const bool run_sobel_y = output_y != nullptr; - - TensorInfo tensor_info(input->info()->tensor_shape(), Format::S32); - _sobel_hor = std::make_unique(); - _sobel_vert = std::make_unique(); - _border_handler = std::make_unique(); - - if(run_sobel_x && run_sobel_y) - { - _tmp_x.allocator()->init(tensor_info); - _tmp_y.allocator()->init(tensor_info); - _memory_group.manage(&_tmp_x); - _memory_group.manage(&_tmp_y); - _sobel_hor->configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert->configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); - _tmp_x.allocator()->allocate(); - _tmp_y.allocator()->allocate(); - } - else if(run_sobel_x) - { - _tmp_x.allocator()->init(tensor_info); - _memory_group.manage(&_tmp_x); - _sobel_hor->configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); - _sobel_vert->configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); - _tmp_x.allocator()->allocate(); - } - else if(run_sobel_y) - { - _tmp_y.allocator()->init(tensor_info); - _memory_group.manage(&_tmp_y); - _sobel_hor->configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); - _sobel_vert->configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); - _tmp_y.allocator()->allocate(); - } - - _border_handler->configure(input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value)); -} - -void NESobel7x7::run() -{ - NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); - - MemoryGroupResourceScope scope_mg(_memory_group); - - NEScheduler::get().schedule(_sobel_hor.get(), Window::DimY); - NEScheduler::get().schedule(_sobel_vert.get(), Window::DimY); -} -} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NETableLookup.cpp b/src/runtime/NEON/functions/NETableLookup.cpp deleted file mode 100644 index fde3908c81..0000000000 --- a/src/runtime/NEON/functions/NETableLookup.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NETableLookup.h" - -#include "src/core/NEON/kernels/NETableLookupKernel.h" - -#include - -using namespace arm_compute; - -void NETableLookup::configure(const ITensor *input, const ILut *lut, ITensor *output) -{ - auto k = std::make_unique(); - k->configure(input, lut, output); - _kernel = std::move(k); -} diff --git a/src/runtime/NEON/functions/NEThreshold.cpp b/src/runtime/NEON/functions/NEThreshold.cpp deleted file mode 100644 index 2aa6ea897a..0000000000 --- a/src/runtime/NEON/functions/NEThreshold.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEThreshold.h" - -#include "src/core/NEON/kernels/NEThresholdKernel.h" - -#include - -namespace arm_compute -{ -void NEThreshold::configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info) -{ - auto k = std::make_unique(); - k->configure(input, output, info); - _kernel = std::move(k); -} - -Status NEThreshold::validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info) -{ - return NEThresholdKernel::validate(input, output, info); -} -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEWarpAffine.cpp b/src/runtime/NEON/functions/NEWarpAffine.cpp deleted file mode 100644 index 1e8907b895..0000000000 --- a/src/runtime/NEON/functions/NEWarpAffine.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEWarpKernel.h" - -#include - -using namespace arm_compute; - -void NEWarpAffine::configure(ITensor *input, ITensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - { - auto k = std::make_unique>(); - k->configure(input, output, matrix, border_mode, constant_border_value); - _kernel = std::move(k); - break; - } - case InterpolationPolicy::BILINEAR: - { - auto k = std::make_unique>(); - k->configure(input, output, matrix, border_mode, constant_border_value); - _kernel = std::move(k); - break; - } - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation type not supported"); - } - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, constant_border_value); - _border_handler = std::move(b); -} diff --git a/src/runtime/NEON/functions/NEWarpPerspective.cpp b/src/runtime/NEON/functions/NEWarpPerspective.cpp deleted file mode 100644 index d546da89b8..0000000000 --- a/src/runtime/NEON/functions/NEWarpPerspective.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEWarpKernel.h" - -#include - -namespace arm_compute -{ -void NEWarpPerspective::configure(ITensor *input, ITensor *output, const std::array &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - { - auto k = std::make_unique>(); - k->configure(input, output, matrix, border_mode, constant_border_value); - _kernel = std::move(k); - break; - } - case InterpolationPolicy::BILINEAR: - { - auto k = std::make_unique>(); - k->configure(input, output, matrix, border_mode, constant_border_value); - _kernel = std::move(k); - break; - } - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation type not supported"); - } - - auto b = std::make_unique(); - b->configure(input, _kernel->border_size(), border_mode, constant_border_value); - _border_handler = std::move(b); -} -} // namespace arm_compute diff --git a/tests/validation/NEON/AbsoluteDifference.cpp b/tests/validation/NEON/AbsoluteDifference.cpp deleted file mode 100644 index 09e3ebc378..0000000000 --- a/tests/validation/NEON/AbsoluteDifference.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ConvertPolicyDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/AbsoluteDifferenceFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/** Input data sets **/ -const auto AbsoluteDifferenceU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType", - DataType::U8)); -const auto AbsoluteDifferenceS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), - framework::dataset::make("DataType", DataType::S16)); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(AbsoluteDifference) - -template -using NEAbsoluteDifferenceFixture = AbsoluteDifferenceValidationFixture; - -TEST_SUITE(U8) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsoluteDifferenceFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AbsoluteDifferenceU8Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsoluteDifferenceFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceU8Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // U8 - -TEST_SUITE(S16) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsoluteDifferenceFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AbsoluteDifferenceS16Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsoluteDifferenceFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceS16Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // S16 - -TEST_SUITE_END() // AbsoluteDifference -TEST_SUITE_END() // Neon -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Accumulate.cpp b/tests/validation/NEON/Accumulate.cpp deleted file mode 100644 index 3718343a71..0000000000 --- a/tests/validation/NEON/Accumulate.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ConvertPolicyDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/AccumulateFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/** Tolerance value for comparing reference's output against implementation's output for floating point data types */ -constexpr AbsoluteTolerance tolerance(1.0f); -/** Input data sets **/ -const auto AccumulateU8Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)); -const auto AccumulateS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); -} // namespace -TEST_SUITE(NEON) -TEST_SUITE(Accumulate) - -TEST_SUITE(U8) - -template -using NEAccumulateFixture = AccumulateValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEAccumulateFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateS16Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEAccumulateFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance); -} - -TEST_SUITE_END() // U8 -TEST_SUITE_END() // Accumulate - -TEST_SUITE(AccumulateWeighted) - -TEST_SUITE(U8) - -template -using NEAccumulateWeightedFixture = AccumulateWeightedValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEAccumulateWeightedFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateU8Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEAccumulateWeightedFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateU8Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance); -} - -TEST_SUITE_END() // U8 -TEST_SUITE_END() // AccumulateWeighted - -TEST_SUITE(AccumulateSquared) - -TEST_SUITE(U8) - -template -using NEAccumulateSquaredFixture = AccumulateSquaredValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEAccumulateSquaredFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateS16Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEAccumulateSquaredFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset)) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance); -} - -TEST_SUITE_END() // U8 -TEST_SUITE_END() // AccumulateSquared - -TEST_SUITE_END() // Neon -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Box3x3.cpp b/tests/validation/NEON/Box3x3.cpp deleted file mode 100644 index 97122f1fe2..0000000000 --- a/tests/validation/NEON/Box3x3.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/Box3x3Fixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */ -constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */ -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Box3x3) - -template -using NEBox3x3Fixture = Box3x3ValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEBox3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEBox3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/CannyEdge.cpp b/tests/validation/NEON/CannyEdge.cpp deleted file mode 100644 index ed46ec16b6..0000000000 --- a/tests/validation/NEON/CannyEdge.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/NEON/ArrayAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ImageFileDatasets.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/CannyEdgeFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/* Allowed ratio of mismatches between target and reference (1.0 = 100%) */ -const float allowed_mismatch_ratio = 0.1f; - -const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), - combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes())); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(CannyEdge) - -template -using NECannyEdgeFixture = CannyEdgeValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NECannyEdgeFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallImageFiles(), data), framework::dataset::make("Format", Format::U8))) -{ - // Validate output - validate(Accessor(_target), _reference, AbsoluteTolerance(0), allowed_mismatch_ratio); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NECannyEdgeFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data), framework::dataset::make("Format", Format::U8))) -{ - // Validate output - validate(Accessor(_target), _reference, AbsoluteTolerance(0), allowed_mismatch_ratio); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/ChannelCombine.cpp b/tests/validation/NEON/ChannelCombine.cpp deleted file mode 100644 index 924119e5bb..0000000000 --- a/tests/validation/NEON/ChannelCombine.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/MultiImage.h" -#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ConvertPolicyDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ChannelCombineFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(ChannelCombine) - -template -using NEChannelCombineFixture = ChannelCombineValidationFixture; - -TEST_SUITE(RGBA) -FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelCombineFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 }))) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelCombineFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 }))) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // RGBA - -TEST_SUITE(YUV) -FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelCombineFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }))) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelCombineFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }))) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // YUV - -TEST_SUITE(YUVPlanar) -FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelCombineFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::NV12, Format::NV21, Format::IYUV, Format::YUV444 }))) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelCombineFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::NV12, Format::NV21, Format::IYUV, Format::YUV444 }))) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // YUVPlanar - -TEST_SUITE_END() // ChannelCombine -TEST_SUITE_END() // Neon -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/ChannelExtract.cpp b/tests/validation/NEON/ChannelExtract.cpp deleted file mode 100644 index ba639e4f88..0000000000 --- a/tests/validation/NEON/ChannelExtract.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/MultiImage.h" -#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ConvertPolicyDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ChannelExtractFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -// Input data sets -const auto ChannelExtractRGBADataset = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }), - framework::dataset::make("ChannelType", { Channel::R, Channel::G, Channel::B, Channel::A })); -const auto ChannelExtractYUVDataset = combine(framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }), - framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V })); -const auto ChannelExtractYUVPlanarDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444, Format::NV12, Format::NV21 }), - framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V })); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(ChannelExtract) - -template -using NEChannelExtractFixture = ChannelExtractValidationFixture; - -TEST_SUITE(RGBA) -FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelExtractFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractRGBADataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelExtractFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractRGBADataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // RGBA - -TEST_SUITE(YUV) -FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelExtractFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractYUVDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelExtractFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // YUV - -TEST_SUITE(YUVPlanar) -FIXTURE_DATA_TEST_CASE(RunSmall, NEChannelExtractFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractYUVPlanarDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEChannelExtractFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVPlanarDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // YUVPlanar - -TEST_SUITE_END() // ChannelExtract -TEST_SUITE_END() // Neon - -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/ColorConvert.cpp b/tests/validation/NEON/ColorConvert.cpp deleted file mode 100644 index 9e2d5b70b3..0000000000 --- a/tests/validation/NEON/ColorConvert.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/MultiImage.h" -#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ColorConvertFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr AbsoluteTolerance tolerance_nv(2); -constexpr AbsoluteTolerance tolerance_u8(2); - -// Input data sets -const auto RGBDataset = framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 }); -const auto YUYVDataset = framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }); - -const auto ColorConvert_RGBA_to_RGB = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }), - framework::dataset::make("FormatType", { Format::RGB888 })); - -const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }), - framework::dataset::make("FormatType", { Format::RGBA8888 })); - -const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }), - framework::dataset::make("FormatType", { Format::U8 })); - -const auto ColorConvert_YUYVDataset_to_RGBDataset = combine(YUYVDataset, - RGBDataset); - -const auto ColorConvert_YUVPlanar_to_RGBDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::NV12, Format::NV21 }), - RGBDataset); - -const auto ColorConvert_RGBDataset_to_NVDataset = combine(RGBDataset, - framework::dataset::make("FormatType", { Format::NV12, Format::IYUV, Format::YUV444 })); - -const auto ColorConvert_YUYVDataset_to_NVDataset = combine(YUYVDataset, - framework::dataset::make("FormatType", { Format::NV12, Format::IYUV })); - -const auto ColorConvert_NVDataset_to_YUVDataset = combine(framework::dataset::make("FormatType", { Format::NV12, Format::NV21 }), - framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444 })); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(ColorConvert) - -template -using NEColorConvertFixture = ColorConvertValidationFixture; - -TEST_SUITE(RGBA) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGBA_to_RGB)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGBA_to_RGB)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // RGBA - -TEST_SUITE(RGB) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_RGBA)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_RGBA)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // RGB - -TEST_SUITE(RGBtoU8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx], tolerance_u8); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx], tolerance_u8); - } -} -TEST_SUITE_END() // RGBtoU8 - -TEST_SUITE(YUV) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUYVDataset_to_RGBDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUYVDataset_to_RGBDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // YUV - -TEST_SUITE(YUVPlanar) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUVPlanar_to_RGBDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUVPlanar_to_RGBDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // YUVPlanar - -TEST_SUITE(NV) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGBDataset_to_NVDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx], tolerance_nv); - } -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGBDataset_to_NVDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx], tolerance_nv); - } -} -TEST_SUITE_END() // NV - -TEST_SUITE(YUYVtoNV) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUYVDataset_to_NVDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUYVDataset_to_NVDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // YUYVtoNV - -TEST_SUITE(NVtoYUV) -FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_NVDataset_to_YUVDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_NVDataset_to_YUVDataset)) -{ - // Validate output - for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) - { - validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); - } -} -TEST_SUITE_END() // NVtoYUV - -TEST_SUITE_END() // ColorConvert -TEST_SUITE_END() // Neon -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Derivative.cpp b/tests/validation/NEON/Derivative.cpp deleted file mode 100644 index b118f2fb88..0000000000 --- a/tests/validation/NEON/Derivative.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEDerivative.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/GradientDimensionDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/DerivativeFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(Derivative) - -using NEDerivativeFixture = DerivativeValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - datasets::GradientDimensions())) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - datasets::GradientDimensions())) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -TEST_SUITE_END() // Derivative -TEST_SUITE_END() // Neon -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Dilate.cpp b/tests/validation/NEON/Dilate.cpp deleted file mode 100644 index 362e4eb4cd..0000000000 --- a/tests/validation/NEON/Dilate.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEDilate.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/DilateFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */ -constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */ -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Dilate) - -template -using NEDilateFixture = DilateValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEDilateFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEDilateFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} - -TEST_SUITE_END() // Dilate -TEST_SUITE_END() // Neon -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/EqualizeHistogram.cpp b/tests/validation/NEON/EqualizeHistogram.cpp deleted file mode 100644 index b781b01714..0000000000 --- a/tests/validation/NEON/EqualizeHistogram.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/EqualizeHistogramFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(EqualizeHistogram) -template -using NEEqualizeHistogramFixture = EqualizeHistogramValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEEqualizeHistogramFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEEqualizeHistogramFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Erode.cpp b/tests/validation/NEON/Erode.cpp deleted file mode 100644 index aca579b0e3..0000000000 --- a/tests/validation/NEON/Erode.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEErode.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ErodeFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */ -constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */ -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Erode) - -template -using NEErodeFixture = ErodeValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEErodeFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEErodeFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/FastCorners.cpp b/tests/validation/NEON/FastCorners.cpp deleted file mode 100644 index 107702c6e7..0000000000 --- a/tests/validation/NEON/FastCorners.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/NEON/ArrayAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ImageFileDatasets.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/FastCornersFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/* Tolerance used to compare corner strengths */ -const AbsoluteTolerance tolerance(0.5f); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(FastCorners) - -template -using NEFastCornersFixture = FastCornersValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEFastCornersFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallImageFiles(), framework::dataset::make("Format", Format::U8)), - framework::dataset::make("SuppressNonMax", { false, true })), - framework::dataset::make("BorderMode", BorderMode::UNDEFINED))) -{ - // Validate output - ArrayAccessor array(_target); - validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEFastCornersFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageFiles(), framework::dataset::make("Format", Format::U8)), - framework::dataset::make("SuppressNonMax", { false, true })), - framework::dataset::make("BorderMode", BorderMode::UNDEFINED))) -{ - // Validate output - ArrayAccessor array(_target); - validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Gaussian3x3.cpp b/tests/validation/NEON/Gaussian3x3.cpp deleted file mode 100644 index efc555e279..0000000000 --- a/tests/validation/NEON/Gaussian3x3.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/Gaussian3x3Fixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr unsigned int filter_size = 3; /** Size of the kernel/filter in number of elements. */ -constexpr BorderSize border_size(filter_size / 2); /** Border size of the kernel/filter around its central element. */ -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Gaussian3x3) - -template -using NEGaussian3x3Fixture = Gaussian3x3ValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEGaussian3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEGaussian3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Gaussian5x5.cpp b/tests/validation/NEON/Gaussian5x5.cpp deleted file mode 100644 index 6352333f12..0000000000 --- a/tests/validation/NEON/Gaussian5x5.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/Gaussian5x5Fixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr unsigned int filter_size = 5; /** Size of the kernel/filter in number of elements. */ -constexpr BorderSize border_size(filter_size / 2); /** Border size of the kernel/filter around its central element. */ -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Gaussian5x5) - -template -using NEGaussian5x5Fixture = Gaussian5x5ValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEGaussian5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEGaussian5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/GaussianPyramid.cpp b/tests/validation/NEON/GaussianPyramid.cpp deleted file mode 100644 index 9639cbfd91..0000000000 --- a/tests/validation/NEON/GaussianPyramid.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/GaussianPyramidHalfFixture.h" -#include "tests/validation/reference/Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -const auto small_gaussian_pyramid_levels = combine(datasets::Medium2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 4); -const auto large_gaussian_pyramid_levels = combine(datasets::Large2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 5); - -template -inline void validate_gaussian_pyramid(const Pyramid &target, const std::vector> &reference, BorderMode border_mode) -{ - ValidRegion prev_valid_region = shape_to_valid_region(reference[0].shape()); - - for(size_t i = 1; i < reference.size(); ++i) - { - const ValidRegion valid_region = shape_to_valid_region_gaussian_pyramid_half(reference[i - 1].shape(), prev_valid_region, (border_mode == BorderMode::UNDEFINED)); - - // Validate outputs - validate(Accessor(*(target.get_pyramid_level(i))), reference[i], valid_region); - - // Keep the valid region for the next level - prev_valid_region = valid_region; - } -} -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(GaussianPyramid) -TEST_SUITE(Half) - -template -using NEGaussianPyramidHalfFixture = GaussianPyramidHalfValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmallGaussianPyramidHalf, NEGaussianPyramidHalfFixture, framework::DatasetMode::NIGHTLY, small_gaussian_pyramid_levels) -{ - validate_gaussian_pyramid(_target, _reference, _border_mode); -} - -FIXTURE_DATA_TEST_CASE(RunLargeGaussianPyramidHalf, NEGaussianPyramidHalfFixture, framework::DatasetMode::NIGHTLY, large_gaussian_pyramid_levels) -{ - validate_gaussian_pyramid(_target, _reference, _border_mode); -} -TEST_SUITE_END() -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/HOGDescriptor.cpp b/tests/validation/NEON/HOGDescriptor.cpp deleted file mode 100644 index f6c7ed310b..0000000000 --- a/tests/validation/NEON/HOGDescriptor.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/HOG.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/HOGDescriptorDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/HOGDescriptorFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -RelativeTolerance tolerance(0.1f); -constexpr float tolerance_number = 0.05f; -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(HOGDescriptor) - -// *INDENT-OFF* -// clang-format off -using NEHOGDescriptorFixture = HOGDescriptorValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEHOGDescriptorFixture, framework::DatasetMode::NIGHTLY, - combine(combine( - datasets::SmallHOGDescriptorDataset(), - framework::dataset::make("Format", Format::U8)), - framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE}))) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance, tolerance_number); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEHOGDescriptorFixture, framework::DatasetMode::NIGHTLY, - combine(combine( - datasets::LargeHOGDescriptorDataset(), - framework::dataset::make("Format", Format::U8)), - framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE}))) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance, tolerance_number); -} -// clang-format on -// *INDENT-ON* - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/HOGDetector.cpp b/tests/validation/NEON/HOGDetector.cpp deleted file mode 100644 index 5da80e23bb..0000000000 --- a/tests/validation/NEON/HOGDetector.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2018-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" -#include "tests/NEON/Accessor.h" -#include "tests/NEON/ArrayAccessor.h" -#include "tests/NEON/HOGAccessor.h" -#include "tests/datasets/HOGDescriptorDataset.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/HOGDetectorFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/* Set the tolerance (percentage) used when validating the score of detection window. - Note: High tolerance is required due to divergence between CL and Neon detection window scores. */ -RelativeTolerance tolerance(1.0f); - -/* Input dataset (values must be a multiple of the HOGInfo block_size) */ -const auto DetectionWindowStrideDataset = framework::dataset::make("DetectionWindowStride", { Size2D(8, 8), Size2D(16, 16) }); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(HOGDetector) - -// *INDENT-OFF* -// clang-format off -using NEHOGDetectorFixture = HOGDetectorValidationFixture, - HOGAccessor, - NEHOGDetector, - uint8_t, - float>; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEHOGDetectorFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - DetectionWindowStrideDataset, - datasets::SmallHOGDescriptorDataset()), - framework::dataset::make("Format", Format::U8)), - framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE}))) -{ - // Validate output - validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEHOGDetectorFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - DetectionWindowStrideDataset, - datasets::LargeHOGDescriptorDataset()), - framework::dataset::make("Format", Format::U8)), - framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE}))) -{ - // Validate output - validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance); -} - -// clang-format on -// *INDENT-ON* - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/HOGMultiDetection.cpp b/tests/validation/NEON/HOGMultiDetection.cpp deleted file mode 100644 index ba51e371ae..0000000000 --- a/tests/validation/NEON/HOGMultiDetection.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/MultiHOG.h" -#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" -#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" -#include "arm_compute/runtime/Tensor.h" -#include "tests/NEON/Accessor.h" -#include "tests/NEON/ArrayAccessor.h" -#include "tests/NEON/HOGAccessor.h" -#include "tests/datasets/HOGMultiDetectionDataset.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/HOGMultiDetectionFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/* Set the tolerance (percentage) used when validating the strength of detection window. */ -RelativeTolerance tolerance(1.0f); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(HOGMultiDetection) - -// *INDENT-OFF* -// clang-format off -using NEHOGMultiDetectionFixture = HOGMultiDetectionValidationFixture, - ArrayAccessor, - HOGAccessor, - NEHOGMultiDetection, - uint8_t, - float>; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEHOGMultiDetectionFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - datasets::SmallHOGMultiDetectionDataset(), - framework::dataset::make("Format", Format::U8)), - framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})), - framework::dataset::make("NonMaximaSuppression", {false, true}))) -{ - // Validate output - validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEHOGMultiDetectionFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - datasets::LargeHOGMultiDetectionDataset(), - framework::dataset::make("Format", Format::U8)), - framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})), - framework::dataset::make("NonMaximaSuppression", {false, true}))) -{ - // Validate output - validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance); -} - -// clang-format on -// *INDENT-ON* - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp deleted file mode 100644 index 4c05c777b6..0000000000 --- a/tests/validation/NEON/HarrisCorners.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/NEON/ArrayAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ImageFileDatasets.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/HarrisCornersFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/* Allowed percentage of keypoints missing for target */ -const float allowed_missing_percentage = 10.f; -/* Allowed percentage of keypoints mismatching between target and reference */ -const float allowed_mismatch_percentage = 10.f; - -const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("BlockSize", { 3, 5, 7 }), datasets::BorderModes())); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(HarrisCorners) - -template -using NEHarrisCornersFixture = HarrisCornersValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEHarrisCornersFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallImageFiles(), data), framework::dataset::make("Format", - Format::U8))) -{ - // Validate output - ArrayAccessor array(_target); - validate_keypoints(array.buffer(), - array.buffer() + array.num_values(), - _reference.begin(), - _reference.end(), - RelativeTolerance(0.0001f), - allowed_missing_percentage, - allowed_mismatch_percentage); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEHarrisCornersFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data), framework::dataset::make("Format", Format::U8))) -{ - // Validate output - ArrayAccessor array(_target); - validate_keypoints(array.buffer(), - array.buffer() + array.num_values(), - _reference.begin(), - _reference.end(), - RelativeTolerance(0.0001f), - allowed_missing_percentage, - allowed_mismatch_percentage); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Histogram.cpp b/tests/validation/NEON/Histogram.cpp deleted file mode 100644 index 906880251b..0000000000 --- a/tests/validation/NEON/Histogram.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/Distribution1D.h" -#include "arm_compute/runtime/NEON/functions/NEHistogram.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/HistogramFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(Histogram) - -template -using NEHistogramFixture = HistogramValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEHistogramFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEHistogramFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/IntegralImage.cpp b/tests/validation/NEON/IntegralImage.cpp deleted file mode 100644 index a29f7423c1..0000000000 --- a/tests/validation/NEON/IntegralImage.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/IntegralImageFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(IntegralImage) - -template -using NEIntegralImageFixture = IntegralImageValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEIntegralImageFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEIntegralImageFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/LaplacianPyramid.cpp b/tests/validation/NEON/LaplacianPyramid.cpp deleted file mode 100644 index b5215e0fa1..0000000000 --- a/tests/validation/NEON/LaplacianPyramid.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/LaplacianPyramidFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -const auto small_laplacian_pyramid_levels = framework::dataset::make("NumLevels", 2, 3); -const auto large_laplacian_pyramid_levels = framework::dataset::make("NumLevels", 2, 5); - -const auto formats = combine(framework::dataset::make("FormatIn", Format::U8), framework::dataset::make("FormatOut", Format::S16)); - -template -inline void validate_laplacian_pyramid(const Pyramid &target, const std::vector> &reference, BorderMode border_mode) -{ - Tensor *level_image = target.get_pyramid_level(0); - ValidRegion valid_region = shape_to_valid_region(reference[0].shape(), border_mode == BorderMode::UNDEFINED, BorderSize(2)); - - // Validate lowest level - validate(Accessor(*level_image), reference[0], valid_region); - - // Validate remaining levels - for(size_t lev = 1; lev < target.info()->num_levels(); lev++) - { - level_image = target.get_pyramid_level(lev); - Tensor *prev_level_image = target.get_pyramid_level(lev - 1); - - valid_region = shape_to_valid_region_laplacian_pyramid(prev_level_image->info()->tensor_shape(), - prev_level_image->info()->valid_region(), - border_mode == BorderMode::UNDEFINED); - - // Validate level - validate(Accessor(*level_image), reference[lev], valid_region); - } -} -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(LaplacianPyramid) - -// *INDENT-OFF* -// clang-format off - -using NELaplacianPyramidFixture = LaplacianPyramidValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NELaplacianPyramidFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - datasets::Medium2DShapes(), - datasets::BorderModes()), - small_laplacian_pyramid_levels), - formats)) -{ - validate_laplacian_pyramid(_target, _reference, _border_mode); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NELaplacianPyramidFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - datasets::Large2DShapes(), - datasets::BorderModes()), - large_laplacian_pyramid_levels), - formats)) -{ - validate_laplacian_pyramid(_target, _reference, _border_mode); -} -// clang-format on -// *INDENT-ON* - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/LaplacianReconstruct.cpp b/tests/validation/NEON/LaplacianReconstruct.cpp deleted file mode 100644 index b679d365b4..0000000000 --- a/tests/validation/NEON/LaplacianReconstruct.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" -#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" -#include "arm_compute/runtime/Pyramid.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/LaplacianReconstructFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -const auto small_laplacian_reconstruct_levels = framework::dataset::make("NumLevels", 2, 3); -const auto large_laplacian_reconstruct_levels = framework::dataset::make("NumLevels", 2, 5); - -const auto formats = combine(framework::dataset::make("FormatIn", Format::S16), framework::dataset::make("FormatOut", Format::U8)); - -template -void validate_laplacian_reconstruct(Tensor &target, const SimpleTensor &reference, BorderMode border_mode, size_t num_levels) -{ - const unsigned int filter_size = 5; - const unsigned int border_size(filter_size / 2); - - BorderSize border(std::pow(border_size, num_levels)); - - // Validate output - ValidRegion valid_region = shape_to_valid_region(reference.shape(), border_mode == BorderMode::UNDEFINED, border); - validate(Accessor(target), reference, valid_region); -} -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(LaplacianReconstruct) - -// *INDENT-OFF* -// clang-format off - -using NELaplacianReconstructFixture = LaplacianReconstructValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NELaplacianReconstructFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - datasets::Medium2DShapes(), - datasets::BorderModes()), - small_laplacian_reconstruct_levels), - formats)) -{ - validate_laplacian_reconstruct(_target, _reference, _border_mode, _pyramid_levels); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NELaplacianReconstructFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine( - datasets::Large2DShapes(), - datasets::BorderModes()), - large_laplacian_reconstruct_levels), - formats)) -{ - validate_laplacian_reconstruct(_target, _reference, _border_mode, _pyramid_levels); -} -// clang-format on -// *INDENT-ON* - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Magnitude.cpp b/tests/validation/NEON/Magnitude.cpp deleted file mode 100644 index 76429af211..0000000000 --- a/tests/validation/NEON/Magnitude.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/MagnitudeFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -template -AbsoluteTolerance tolerance(MagnitudeType magnitude_type) -{ - return AbsoluteTolerance((MagnitudeType::L1NORM == magnitude_type) ? 0 : 1); -} -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Magnitude) - -template -using NEMagnitudeFixture = MagnitudeValidationFixture; - -TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEMagnitudeFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S16)), - framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }))) - -{ - // Validate output - validate(Accessor(_target), _reference, tolerance(_magnitude_type)); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEMagnitudeFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)), - framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }))) -{ - // Validate output - validate(Accessor(_target), _reference, tolerance(_magnitude_type)); -} -TEST_SUITE_END() // S16 - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/MeanStdDev.cpp b/tests/validation/NEON/MeanStdDev.cpp deleted file mode 100644 index 8c254b2633..0000000000 --- a/tests/validation/NEON/MeanStdDev.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/MeanStdDevFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -RelativeTolerance tolerance_rel_high_error(0.05f); -RelativeTolerance tolerance_rel_low_error(0.0005f); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(MeanStdDev) - -template -using NEMeanStdDevFixture = MeanStdDevValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate mean output - validate(_target.first, _reference.first); - - // Validate std_dev output - validate(_target.second, _reference.second, tolerance_rel_high_error); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate mean output - validate(_target.first, _reference.first, tolerance_rel_low_error); - - // Validate std_dev output - validate(_target.second, _reference.second, tolerance_rel_high_error); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Median3x3.cpp b/tests/validation/NEON/Median3x3.cpp deleted file mode 100644 index 0fac6511f6..0000000000 --- a/tests/validation/NEON/Median3x3.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/Median3x3Fixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */ -constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */ -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Median3x3) - -template -using NEMedian3x3Fixture = Median3x3ValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEMedian3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEMedian3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size)); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/MinMaxLocation.cpp b/tests/validation/NEON/MinMaxLocation.cpp deleted file mode 100644 index 581dcb1fe2..0000000000 --- a/tests/validation/NEON/MinMaxLocation.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" -#include "tests/NEON/Accessor.h" -#include "tests/NEON/ArrayAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/MinMaxLocationFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(MinMaxLocation) - -template -using NEMinMaxLocationFixture = MinMaxLocationValidationFixture, ArrayAccessor, NEMinMaxLocation, T>; - -TEST_SUITE(U8) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEMinMaxLocationFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - validate_min_max_loc(_target, _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEMinMaxLocationFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", - DataType::U8))) -{ - validate_min_max_loc(_target, _reference); -} - -TEST_SUITE_END() // U8 - -TEST_SUITE(S16) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEMinMaxLocationFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", - DataType::S16))) -{ - validate_min_max_loc(_target, _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEMinMaxLocationFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", - DataType::S16))) -{ - validate_min_max_loc(_target, _reference); -} - -TEST_SUITE_END() // S16 - -TEST_SUITE(Float) - -FIXTURE_DATA_TEST_CASE(RunSmall, NEMinMaxLocationFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate_min_max_loc(_target, _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEMinMaxLocationFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", - DataType::F32))) -{ - validate_min_max_loc(_target, _reference); -} - -TEST_SUITE_END() // F32 - -TEST_SUITE_END() // MinMaxLocation -TEST_SUITE_END() // Neon -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/NonLinearFilter.cpp b/tests/validation/NEON/NonLinearFilter.cpp deleted file mode 100644 index 4b2ad2796a..0000000000 --- a/tests/validation/NEON/NonLinearFilter.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/MatrixPatternDataset.h" -#include "tests/datasets/NonLinearFilterFunctionDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/NonLinearFilterFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(NonLinearFilter) - -template -using NENonLinearFilterFixture = NonLinearFilterValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NENonLinearFilterFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::SmallShapes(), - datasets::NonLinearFilterFunctions()), - framework::dataset::make("MaskSize", { 3U, 5U })), - datasets::MatrixPatterns()), - datasets::BorderModes()), - framework::dataset::make("DataType", DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), _border_size)); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NENonLinearFilterFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), - datasets::NonLinearFilterFunctions()), - framework::dataset::make("MaskSize", { 3U, 5U })), - datasets::MatrixPatterns()), - datasets::BorderModes()), - framework::dataset::make("DataType", DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), _border_size)); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/OpticalFlow.cpp b/tests/validation/NEON/OpticalFlow.cpp deleted file mode 100644 index f0bc47e804..0000000000 --- a/tests/validation/NEON/OpticalFlow.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/Array.h" -#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" -#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" -#include "arm_compute/runtime/Pyramid.h" -#include "arm_compute/runtime/Tensor.h" -#include "tests/NEON/Accessor.h" -#include "tests/NEON/ArrayAccessor.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/OpticalFlowDataset.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/OpticalFlowFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(OpticalFlow) - -// *INDENT-OFF* -// clang-format off -using NEOpticalFlowFixture = OpticalFlowValidationFixture, - NEOpticalFlow, - Pyramid, - NEGaussianPyramidHalf, - uint8_t>; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEOpticalFlowFixture, framework::DatasetMode::NIGHTLY, combine(combine( - datasets::SmallOpticalFlowDataset(), - framework::dataset::make("Format", Format::U8)), - datasets::BorderModes())) -{ - // Validate output - ArrayAccessor array(_target); - validate_keypoints(array.buffer(), - array.buffer() + array.num_values(), - _reference.begin(), - _reference.end()); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEOpticalFlowFixture, framework::DatasetMode::NIGHTLY, combine(combine( - datasets::LargeOpticalFlowDataset(), - framework::dataset::make("Format", Format::U8)), - datasets::BorderModes())) -{ - // Validate output - ArrayAccessor array(_target); - - validate_keypoints(array.buffer(), - array.buffer() + array.num_values(), - _reference.begin(), - _reference.end()); -} -// clang-format on -// *INDENT-ON* - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Phase.cpp b/tests/validation/NEON/Phase.cpp deleted file mode 100644 index 6d939b357c..0000000000 --- a/tests/validation/NEON/Phase.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEPhase.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/PhaseFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr AbsoluteTolerance tolerance_value(1); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Phase) - -template -using NEPhaseFixture = PhaseValidationFixture; - -TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEPhaseFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S16)), - framework::dataset::make("PhaseType", { PhaseType::UNSIGNED, PhaseType::SIGNED }))) -{ - // Validate output - validate_wrap(Accessor(_target), _reference, tolerance_value, 0); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEPhaseFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)), - framework::dataset::make("PhaseType", { PhaseType::UNSIGNED, PhaseType::SIGNED }))) -{ - // Validate output - validate_wrap(Accessor(_target), _reference, tolerance_value, 0); -} -TEST_SUITE_END() // S16 - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Remap.cpp b/tests/validation/NEON/Remap.cpp deleted file mode 100644 index fe622ff95f..0000000000 --- a/tests/validation/NEON/Remap.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NERemap.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/RemapFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr AbsoluteTolerance tolerance_value(0); -constexpr float tolerance_number = 0.f; -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Remap) - -template -using NERemapFixture = RemapValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NERemapFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) -{ - // Validate output - validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NERemapFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) -{ - // Validate output - validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); -} -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Schaar.cpp b/tests/validation/NEON/Schaar.cpp deleted file mode 100644 index 28c43ccf1c..0000000000 --- a/tests/validation/NEON/Schaar.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/GradientDimensionDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ScharrFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(Scharr) - -TEST_SUITE(W3x3) -using NEScharr3x3Fixture = ScharrValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - datasets::GradientDimensions())) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NEScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - datasets::GradientDimensions())) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Sobel.cpp b/tests/validation/NEON/Sobel.cpp deleted file mode 100644 index 86d2c67704..0000000000 --- a/tests/validation/NEON/Sobel.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" -#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" -#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/SobelFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(Sobel) - -TEST_SUITE(W3x3) -using NESobel3x3Fixture = SobelValidationFixture; - -TEST_SUITE(X) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_X))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_X))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); -} -TEST_SUITE_END() -TEST_SUITE(Y) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y))) -{ - // Validate output - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y))) -{ - // Validate output - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} -TEST_SUITE_END() -TEST_SUITE(XY) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE(W5x5) -using NESobel5x5Fixture = SobelValidationFixture; - -TEST_SUITE(X) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_X))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.first), _reference.first, valid_region_x); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_X))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.first), _reference.first, valid_region_x); -} -TEST_SUITE_END() -TEST_SUITE(Y) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y))) -{ - // Validate output - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y))) -{ - // Validate output - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} -TEST_SUITE_END() -TEST_SUITE(XY) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE(W7x7) -using NESobel7x7Fixture = SobelValidationFixture; -TEST_SUITE(X) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_X))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.first), _reference.first, valid_region_x); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_X))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.first), _reference.first, valid_region_x); -} -TEST_SUITE_END() -TEST_SUITE(Y) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y))) -{ - // Validate output - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y))) -{ - // Validate output - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} -TEST_SUITE_END() -TEST_SUITE(XY) -FIXTURE_DATA_TEST_CASE(RunSmall, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format", - Format::U8)), - framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY))) -{ - // Validate output - ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.first), _reference.first, valid_region_x); - - ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3)); - validate(Accessor(_target.second), _reference.second, valid_region_y); -} -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/TableLookup.cpp b/tests/validation/NEON/TableLookup.cpp deleted file mode 100644 index 9543d827c4..0000000000 --- a/tests/validation/NEON/TableLookup.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NETableLookup.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" - -#include "tests/NEON/Accessor.h" -#include "tests/NEON/LutAccessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" - -#include "tests/validation/Helpers.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/TableLookupFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(TableLookup) - -template -using NETableLookupFixture = TableLookupValidationFixture, Lut, T>; -TEST_SUITE(U8) - -FIXTURE_DATA_TEST_CASE(RunSmallU8, NETableLookupFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeU8, NETableLookupFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmallS16, NETableLookupFixture, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S16))) -{ - // Validate output - validate(Accessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLargeS16, NETableLookupFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::S16))) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/Threshold.cpp b/tests/validation/NEON/Threshold.cpp deleted file mode 100644 index 0094a027f9..0000000000 --- a/tests/validation/NEON/Threshold.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEThreshold.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/datasets/ThresholdDataset.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ThresholdFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(NEON) -TEST_SUITE(Threshold) - -template -using ThresholdFixture = ThresholdValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, ThresholdFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), datasets::MixedThresholdDataset()), - framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, ThresholdFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), datasets::MixedThresholdDataset()), - framework::dataset::make("DataType", - DataType::U8))) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/WarpAffine.cpp b/tests/validation/NEON/WarpAffine.cpp deleted file mode 100644 index 91748580a4..0000000000 --- a/tests/validation/NEON/WarpAffine.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/AssetsLibrary.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/InterpolationPolicyDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/WarpAffineFixture.h" -#include "tests/validation/reference/Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -/** Tolerance */ -constexpr AbsoluteTolerance tolerance(1); -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(WarpAffine) - -template -using NEWarpAffineFixture = WarpAffineValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEWarpAffineFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)), - framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, _valid_mask, tolerance, 0.02f); -} -DISABLED_FIXTURE_DATA_TEST_CASE(RunLarge, NEWarpAffineFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - datasets::BorderModes())) -{ - // Validate output - validate(Accessor(_target), _reference, _valid_mask, tolerance, 0.02f); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/WarpPerspective.cpp b/tests/validation/NEON/WarpPerspective.cpp deleted file mode 100644 index 28f206d4d1..0000000000 --- a/tests/validation/NEON/WarpPerspective.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/WarpPerspectiveFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr AbsoluteTolerance tolerance_value(1); -constexpr float tolerance_number = 0.2f; -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(WarpPerspective) - -template -using NEWarpPerspectiveFixture = WarpPerspectiveValidationFixture; - -FIXTURE_DATA_TEST_CASE(RunSmall, NEWarpPerspectiveFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - datasets::BorderModes())) -{ - validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); -} -FIXTURE_DATA_TEST_CASE(RunLarge, NEWarpPerspectiveFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - datasets::BorderModes())) -{ - validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); -} - -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute -- cgit v1.2.1