aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2021-02-23 11:48:12 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2021-03-03 15:04:20 +0000
commit473cb01e84cef6cab057e9492bfa3b68f708e5d7 (patch)
treea500b8a8afe6a0442e1a54fb8d52c77d22543bcb
parentf466d75f85938b96dd14675ec091193bdce12122 (diff)
downloadComputeLibrary-473cb01e84cef6cab057e9492bfa3b68f708e5d7.tar.gz
Remove Compute Vision CL support
Resolves COMPMID-4151 Change-Id: I46f541efe8c4087f27794d2e158b6c1547d459ba Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5160 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--Android.bp81
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h41
-rw-r--r--arm_compute/runtime/CL/functions/CLAbsoluteDifference.h62
-rw-r--r--arm_compute/runtime/CL/functions/CLAccumulate.h109
-rw-r--r--arm_compute/runtime/CL/functions/CLBox3x3.h67
-rw-r--r--arm_compute/runtime/CL/functions/CLCannyEdge.h114
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelCombine.h82
-rw-r--r--arm_compute/runtime/CL/functions/CLChannelExtract.h77
-rw-r--r--arm_compute/runtime/CL/functions/CLColorConvert.h104
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolution.h190
-rw-r--r--arm_compute/runtime/CL/functions/CLDerivative.h75
-rw-r--r--arm_compute/runtime/CL/functions/CLDilate.h67
-rw-r--r--arm_compute/runtime/CL/functions/CLEqualizeHistogram.h90
-rw-r--r--arm_compute/runtime/CL/functions/CLErode.h67
-rw-r--r--arm_compute/runtime/CL/functions/CLFastCorners.h110
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussian3x3.h67
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussian5x5.h100
-rw-r--r--arm_compute/runtime/CL/functions/CLGaussianPyramid.h143
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGDescriptor.h97
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGDetector.h103
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGGradient.h91
-rw-r--r--arm_compute/runtime/CL/functions/CLHOGMultiDetection.h138
-rw-r--r--arm_compute/runtime/CL/functions/CLHarrisCorners.h129
-rw-r--r--arm_compute/runtime/CL/functions/CLHistogram.h76
-rw-r--r--arm_compute/runtime/CL/functions/CLIntegralImage.h79
-rw-r--r--arm_compute/runtime/CL/functions/CLLaplacianPyramid.h101
-rw-r--r--arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h110
-rw-r--r--arm_compute/runtime/CL/functions/CLMagnitude.h62
-rw-r--r--arm_compute/runtime/CL/functions/CLMeanStdDev.h107
-rw-r--r--arm_compute/runtime/CL/functions/CLMedian3x3.h67
-rw-r--r--arm_compute/runtime/CL/functions/CLMinMaxLocation.h111
-rw-r--r--arm_compute/runtime/CL/functions/CLNonLinearFilter.h79
-rw-r--r--arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h71
-rw-r--r--arm_compute/runtime/CL/functions/CLOpticalFlow.h146
-rw-r--r--arm_compute/runtime/CL/functions/CLPhase.h62
-rw-r--r--arm_compute/runtime/CL/functions/CLScharr3x3.h73
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel3x3.h81
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel5x5.h103
-rw-r--r--arm_compute/runtime/CL/functions/CLSobel7x7.h103
-rw-r--r--arm_compute/runtime/CL/functions/CLTableLookup.h56
-rw-r--r--arm_compute/runtime/CL/functions/CLThreshold.h64
-rw-r--r--arm_compute/runtime/CL/functions/CLWarpAffine.h71
-rw-r--r--arm_compute/runtime/CL/functions/CLWarpPerspective.h69
-rw-r--r--arm_compute/runtime/NEON/NEFunctions.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolution.h178
-rw-r--r--arm_compute/runtime/NEON/functions/NERemap.h (renamed from arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h)33
-rw-r--r--docs/00_introduction.dox105
-rw-r--r--docs/01_library.dox7
-rw-r--r--docs/06_functions_list.dox52
-rw-r--r--examples/cl_convolution.cpp137
-rw-r--r--examples/cl_events.cpp132
-rw-r--r--examples/neon_convolution.cpp130
-rw-r--r--src/core/CL/CLKernelLibrary.cpp249
-rw-r--r--src/core/CL/CLKernels.h34
-rw-r--r--src/core/CL/cl_kernels/absdiff.cl65
-rw-r--r--src/core/CL/cl_kernels/accumulate.cl130
-rw-r--r--src/core/CL/cl_kernels/canny.cl454
-rw-r--r--src/core/CL/cl_kernels/channel_combine.cl416
-rw-r--r--src/core/CL/cl_kernels/channel_extract.cl272
-rw-r--r--src/core/CL/cl_kernels/color_convert.cl1911
-rw-r--r--src/core/CL/cl_kernels/convolution3x3.cl137
-rw-r--r--src/core/CL/cl_kernels/convolution5x5.cl287
-rw-r--r--src/core/CL/cl_kernels/convolution7x7.cl338
-rw-r--r--src/core/CL/cl_kernels/convolution9x9.cl403
-rw-r--r--src/core/CL/cl_kernels/convolution_rectangle.cl118
-rw-r--r--src/core/CL/cl_kernels/derivative.cl80
-rw-r--r--src/core/CL/cl_kernels/dilate.cl56
-rw-r--r--src/core/CL/cl_kernels/erode.cl56
-rw-r--r--src/core/CL/cl_kernels/fast_corners.cl262
-rw-r--r--src/core/CL/cl_kernels/gaussian_pyramid.cl113
-rw-r--r--src/core/CL/cl_kernels/harris_corners.cl376
-rw-r--r--src/core/CL/cl_kernels/histogram.cl243
-rw-r--r--src/core/CL/cl_kernels/hog.cl456
-rw-r--r--src/core/CL/cl_kernels/integral_image.cl100
-rw-r--r--src/core/CL/cl_kernels/magnitude_phase.cl162
-rw-r--r--src/core/CL/cl_kernels/mean_stddev.cl82
-rw-r--r--src/core/CL/cl_kernels/minmaxloc.cl193
-rw-r--r--src/core/CL/cl_kernels/non_linear_filter3x3.cl186
-rw-r--r--src/core/CL/cl_kernels/non_linear_filter5x5.cl483
-rw-r--r--src/core/CL/cl_kernels/non_linear_filter_helpers.h145
-rw-r--r--src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl521
-rw-r--r--src/core/CL/cl_kernels/scharr_filter.cl124
-rw-r--r--src/core/CL/cl_kernels/tablelookup.cl114
-rw-r--r--src/core/CL/cl_kernels/threshold.cl104
-rw-r--r--src/core/CL/cl_kernels/warp_affine.cl120
-rw-r--r--src/core/CL/cl_kernels/warp_perspective.cl128
-rw-r--r--src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp104
-rw-r--r--src/core/CL/kernels/CLAbsoluteDifferenceKernel.h79
-rw-r--r--src/core/CL/kernels/CLAccumulateKernel.cpp101
-rw-r--r--src/core/CL/kernels/CLAccumulateKernel.h114
-rw-r--r--src/core/CL/kernels/CLBox3x3Kernel.cpp81
-rw-r--r--src/core/CL/kernels/CLBox3x3Kernel.h59
-rw-r--r--src/core/CL/kernels/CLCannyEdgeKernel.cpp310
-rw-r--r--src/core/CL/kernels/CLCannyEdgeKernel.h185
-rw-r--r--src/core/CL/kernels/CLChannelCombineKernel.cpp296
-rw-r--r--src/core/CL/kernels/CLChannelCombineKernel.h102
-rw-r--r--src/core/CL/kernels/CLChannelExtractKernel.cpp196
-rw-r--r--src/core/CL/kernels/CLChannelExtractKernel.h95
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.cpp558
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.h121
-rw-r--r--src/core/CL/kernels/CLConvolutionKernel.cpp392
-rw-r--r--src/core/CL/kernels/CLConvolutionKernel.h224
-rw-r--r--src/core/CL/kernels/CLDerivativeKernel.cpp155
-rw-r--r--src/core/CL/kernels/CLDerivativeKernel.h83
-rw-r--r--src/core/CL/kernels/CLDilateKernel.cpp70
-rw-r--r--src/core/CL/kernels/CLDilateKernel.h59
-rw-r--r--src/core/CL/kernels/CLErodeKernel.cpp70
-rw-r--r--src/core/CL/kernels/CLErodeKernel.h59
-rw-r--r--src/core/CL/kernels/CLFastCornersKernel.cpp209
-rw-r--r--src/core/CL/kernels/CLFastCornersKernel.h133
-rw-r--r--src/core/CL/kernels/CLGaussian3x3Kernel.cpp81
-rw-r--r--src/core/CL/kernels/CLGaussian3x3Kernel.h59
-rw-r--r--src/core/CL/kernels/CLGaussian5x5Kernel.cpp55
-rw-r--r--src/core/CL/kernels/CLGaussian5x5Kernel.h83
-rw-r--r--src/core/CL/kernels/CLGaussianPyramidKernel.cpp247
-rw-r--r--src/core/CL/kernels/CLGaussianPyramidKernel.h111
-rw-r--r--src/core/CL/kernels/CLHOGDescriptorKernel.cpp237
-rw-r--r--src/core/CL/kernels/CLHOGDescriptorKernel.h122
-rw-r--r--src/core/CL/kernels/CLHOGDetectorKernel.cpp146
-rw-r--r--src/core/CL/kernels/CLHOGDetectorKernel.h96
-rw-r--r--src/core/CL/kernels/CLHarrisCornersKernel.cpp149
-rw-r--r--src/core/CL/kernels/CLHarrisCornersKernel.h100
-rw-r--r--src/core/CL/kernels/CLHistogramKernel.cpp253
-rw-r--r--src/core/CL/kernels/CLHistogramKernel.h111
-rw-r--r--src/core/CL/kernels/CLIntegralImageKernel.cpp146
-rw-r--r--src/core/CL/kernels/CLIntegralImageKernel.h86
-rw-r--r--src/core/CL/kernels/CLMagnitudePhaseKernel.cpp176
-rw-r--r--src/core/CL/kernels/CLMagnitudePhaseKernel.h90
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.cpp156
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.h98
-rw-r--r--src/core/CL/kernels/CLMedian3x3Kernel.cpp88
-rw-r--r--src/core/CL/kernels/CLMedian3x3Kernel.h59
-rw-r--r--src/core/CL/kernels/CLMinMaxLocationKernel.cpp246
-rw-r--r--src/core/CL/kernels/CLMinMaxLocationKernel.h124
-rw-r--r--src/core/CL/kernels/CLNonLinearFilterKernel.cpp104
-rw-r--r--src/core/CL/kernels/CLNonLinearFilterKernel.h77
-rw-r--r--src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp78
-rw-r--r--src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h60
-rw-r--r--src/core/CL/kernels/CLScharr3x3Kernel.cpp127
-rw-r--r--src/core/CL/kernels/CLScharr3x3Kernel.h97
-rw-r--r--src/core/CL/kernels/CLSobel3x3Kernel.cpp141
-rw-r--r--src/core/CL/kernels/CLSobel3x3Kernel.h83
-rw-r--r--src/core/CL/kernels/CLSobel5x5Kernel.cpp251
-rw-r--r--src/core/CL/kernels/CLSobel5x5Kernel.h139
-rw-r--r--src/core/CL/kernels/CLSobel7x7Kernel.cpp255
-rw-r--r--src/core/CL/kernels/CLSobel7x7Kernel.h139
-rw-r--r--src/core/CL/kernels/CLTableLookupKernel.cpp68
-rw-r--r--src/core/CL/kernels/CLTableLookupKernel.h55
-rw-r--r--src/core/CL/kernels/CLThresholdKernel.cpp81
-rw-r--r--src/core/CL/kernels/CLThresholdKernel.h57
-rw-r--r--src/core/CL/kernels/CLWarpAffineKernel.cpp133
-rw-r--r--src/core/CL/kernels/CLWarpAffineKernel.h62
-rw-r--r--src/core/CL/kernels/CLWarpPerspectiveKernel.cpp105
-rw-r--r--src/core/CL/kernels/CLWarpPerspectiveKernel.h59
-rw-r--r--src/core/NEON/NEKernels.h3
-rw-r--r--src/core/NEON/kernels/NEConvolutionKernel.cpp1625
-rw-r--r--src/core/NEON/kernels/NEConvolutionKernel.h299
-rw-r--r--src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp516
-rw-r--r--src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h107
-rw-r--r--src/core/NEON/kernels/NERemapKernel.cpp237
-rw-r--r--src/core/NEON/kernels/NERemapKernel.h83
-rw-r--r--src/runtime/CL/functions/CLAbsoluteDifference.cpp42
-rw-r--r--src/runtime/CL/functions/CLAccumulate.cpp66
-rw-r--r--src/runtime/CL/functions/CLBox3x3.cpp45
-rw-r--r--src/runtime/CL/functions/CLCannyEdge.cpp214
-rw-r--r--src/runtime/CL/functions/CLChannelCombine.cpp54
-rw-r--r--src/runtime/CL/functions/CLChannelExtract.cpp54
-rw-r--r--src/runtime/CL/functions/CLColorConvert.cpp78
-rw-r--r--src/runtime/CL/functions/CLConvolution.cpp144
-rw-r--r--src/runtime/CL/functions/CLDerivative.cpp45
-rw-r--r--src/runtime/CL/functions/CLDilate.cpp45
-rw-r--r--src/runtime/CL/functions/CLEqualizeHistogram.cpp124
-rw-r--r--src/runtime/CL/functions/CLErode.cpp45
-rw-r--r--src/runtime/CL/functions/CLFastCorners.cpp141
-rw-r--r--src/runtime/CL/functions/CLGaussian3x3.cpp45
-rw-r--r--src/runtime/CL/functions/CLGaussian5x5.cpp81
-rw-r--r--src/runtime/CL/functions/CLGaussianPyramid.cpp214
-rw-r--r--src/runtime/CL/functions/CLHOGDescriptor.cpp124
-rw-r--r--src/runtime/CL/functions/CLHOGDetector.cpp77
-rw-r--r--src/runtime/CL/functions/CLHOGGradient.cpp93
-rw-r--r--src/runtime/CL/functions/CLHOGMultiDetection.cpp282
-rw-r--r--src/runtime/CL/functions/CLHarrisCorners.cpp198
-rw-r--r--src/runtime/CL/functions/CLHistogram.cpp50
-rw-r--r--src/runtime/CL/functions/CLIntegralImage.cpp54
-rw-r--r--src/runtime/CL/functions/CLLaplacianPyramid.cpp112
-rw-r--r--src/runtime/CL/functions/CLLaplacianReconstruct.cpp108
-rw-r--r--src/runtime/CL/functions/CLMagnitude.cpp42
-rw-r--r--src/runtime/CL/functions/CLMeanStdDev.cpp177
-rw-r--r--src/runtime/CL/functions/CLMedian3x3.cpp45
-rw-r--r--src/runtime/CL/functions/CLMinMaxLocation.cpp108
-rw-r--r--src/runtime/CL/functions/CLNonLinearFilter.cpp46
-rw-r--r--src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp52
-rw-r--r--src/runtime/CL/functions/CLOpticalFlow.cpp184
-rw-r--r--src/runtime/CL/functions/CLPhase.cpp42
-rw-r--r--src/runtime/CL/functions/CLScharr3x3.cpp45
-rw-r--r--src/runtime/CL/functions/CLSobel3x3.cpp47
-rw-r--r--src/runtime/CL/functions/CLSobel5x5.cpp101
-rw-r--r--src/runtime/CL/functions/CLSobel7x7.cpp101
-rw-r--r--src/runtime/CL/functions/CLTableLookup.cpp42
-rw-r--r--src/runtime/CL/functions/CLThreshold.cpp43
-rw-r--r--src/runtime/CL/functions/CLWarpAffine.cpp46
-rw-r--r--src/runtime/CL/functions/CLWarpPerspective.cpp46
-rw-r--r--src/runtime/NEON/functions/NEConvolution.cpp149
-rw-r--r--src/runtime/NEON/functions/NERemap.cpp (renamed from src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp)35
-rw-r--r--tests/validation/CL/AbsoluteDifference.cpp91
-rw-r--r--tests/validation/CL/Accumulate.cpp116
-rw-r--r--tests/validation/CL/Box3x3.cpp75
-rw-r--r--tests/validation/CL/CannyEdge.cpp78
-rw-r--r--tests/validation/CL/ChannelCombine.cpp113
-rw-r--r--tests/validation/CL/ChannelExtract.cpp106
-rw-r--r--tests/validation/CL/ColorConvert.cpp241
-rw-r--r--tests/validation/CL/Convolution.cpp285
-rw-r--r--tests/validation/CL/Derivative.cpp75
-rw-r--r--tests/validation/CL/Dilate.cpp75
-rw-r--r--tests/validation/CL/EqualizeHistogram.cpp63
-rw-r--r--tests/validation/CL/Erode.cpp75
-rw-r--r--tests/validation/CL/FastCorners.cpp78
-rw-r--r--tests/validation/CL/Gaussian3x3.cpp75
-rw-r--r--tests/validation/CL/Gaussian5x5.cpp75
-rw-r--r--tests/validation/CL/GaussianPyramid.cpp89
-rw-r--r--tests/validation/CL/HOGDescriptor.cpp83
-rw-r--r--tests/validation/CL/HOGDetector.cpp98
-rw-r--r--tests/validation/CL/HOGMultiDetection.cpp97
-rw-r--r--tests/validation/CL/HarrisCorners.cpp79
-rw-r--r--tests/validation/CL/Histogram.cpp68
-rw-r--r--tests/validation/CL/IntegralImage.cpp60
-rw-r--r--tests/validation/CL/LaplacianPyramid.cpp114
-rw-r--r--tests/validation/CL/LaplacianReconstruct.cpp100
-rw-r--r--tests/validation/CL/Magnitude.cpp91
-rw-r--r--tests/validation/CL/MeanStdDev.cpp127
-rw-r--r--tests/validation/CL/Median3x3.cpp74
-rw-r--r--tests/validation/CL/MinMaxLocation.cpp94
-rw-r--r--tests/validation/CL/NonLinearFilter.cpp73
-rw-r--r--tests/validation/CL/OpticalFlow.cpp93
-rw-r--r--tests/validation/CL/Phase.cpp87
-rw-r--r--tests/validation/CL/Scharr.cpp76
-rw-r--r--tests/validation/CL/Sobel.cpp259
-rw-r--r--tests/validation/CL/TableLookup.cpp82
-rw-r--r--tests/validation/CL/Threshold.cpp65
-rw-r--r--tests/validation/CL/WarpAffine.cpp79
-rw-r--r--tests/validation/NEON/Convolution.cpp295
-rw-r--r--tests/validation/NEON/Remap.cpp (renamed from tests/validation/CL/WarpPerspective.cpp)42
-rw-r--r--tests/validation/fixtures/AbsoluteDifferenceFixture.h114
-rw-r--r--tests/validation/fixtures/AccumulateFixture.h193
-rw-r--r--tests/validation/fixtures/Box3x3Fixture.h117
-rw-r--r--tests/validation/fixtures/CannyEdgeFixture.h121
-rw-r--r--tests/validation/fixtures/ChannelCombineFixture.h266
-rw-r--r--tests/validation/fixtures/ChannelExtractFixture.h192
-rw-r--r--tests/validation/fixtures/ColorConvertFixture.h218
-rw-r--r--tests/validation/fixtures/ConvolutionFixture.h235
-rw-r--r--tests/validation/fixtures/DerivativeFixture.h142
-rw-r--r--tests/validation/fixtures/DilateFixture.h117
-rw-r--r--tests/validation/fixtures/EqualizeHistogramFixture.h106
-rw-r--r--tests/validation/fixtures/ErodeFixture.h117
-rw-r--r--tests/validation/fixtures/FastCornersFixture.h138
-rw-r--r--tests/validation/fixtures/Gaussian3x3Fixture.h117
-rw-r--r--tests/validation/fixtures/Gaussian5x5Fixture.h117
-rw-r--r--tests/validation/fixtures/GaussianPyramidHalfFixture.h125
-rw-r--r--tests/validation/fixtures/HOGDescriptorFixture.h135
-rw-r--r--tests/validation/fixtures/HOGDetectorFixture.h138
-rw-r--r--tests/validation/fixtures/HOGMultiDetectionFixture.h193
-rw-r--r--tests/validation/fixtures/HarrisCornersFixture.h115
-rw-r--r--tests/validation/fixtures/HistogramFixture.h129
-rw-r--r--tests/validation/fixtures/IntegralImageFixture.h106
-rw-r--r--tests/validation/fixtures/LaplacianPyramidFixture.h154
-rw-r--r--tests/validation/fixtures/LaplacianReconstructFixture.h104
-rw-r--r--tests/validation/fixtures/MagnitudeFixture.h123
-rw-r--r--tests/validation/fixtures/MeanStdDevFixture.h115
-rw-r--r--tests/validation/fixtures/Median3x3Fixture.h117
-rw-r--r--tests/validation/fixtures/MinMaxLocationFixture.h122
-rw-r--r--tests/validation/fixtures/NonLinearFilterFixture.h119
-rw-r--r--tests/validation/fixtures/OpticalFlowFixture.h186
-rw-r--r--tests/validation/fixtures/PhaseFixture.h122
-rw-r--r--tests/validation/fixtures/SobelFixture.h192
-rw-r--r--tests/validation/fixtures/TableLookupFixture.h122
-rw-r--r--tests/validation/fixtures/ThresholdFixture.h107
-rw-r--r--tests/validation/fixtures/WarpAffineFixture.h121
-rw-r--r--tests/validation/fixtures/WarpPerspectiveFixture.h134
-rw-r--r--tests/validation/reference/Convolution.cpp68
-rw-r--r--tests/validation/reference/Convolution.h45
280 files changed, 460 insertions, 38179 deletions
diff --git a/Android.bp b/Android.bp
index c5980c3170..a51b91bee0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -82,23 +82,15 @@ cc_library_static {
"src/core/CL/gemm/reshaped/CLGEMMDefaultConfigReshapedValhall.cpp",
"src/core/CL/gemm/reshaped_only_rhs/CLGEMMDefaultConfigReshapedRHSOnlyBifrost.cpp",
"src/core/CL/gemm/reshaped_only_rhs/CLGEMMDefaultConfigReshapedRHSOnlyValhall.cpp",
- "src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp",
- "src/core/CL/kernels/CLAccumulateKernel.cpp",
"src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp",
"src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
"src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp",
"src/core/CL/kernels/CLBitwiseKernel.cpp",
"src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp",
- "src/core/CL/kernels/CLBox3x3Kernel.cpp",
- "src/core/CL/kernels/CLCannyEdgeKernel.cpp",
- "src/core/CL/kernels/CLChannelCombineKernel.cpp",
- "src/core/CL/kernels/CLChannelExtractKernel.cpp",
"src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp",
"src/core/CL/kernels/CLCol2ImKernel.cpp",
- "src/core/CL/kernels/CLColorConvertKernel.cpp",
"src/core/CL/kernels/CLComparisonKernel.cpp",
"src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp",
- "src/core/CL/kernels/CLConvolutionKernel.cpp",
"src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
"src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp",
"src/core/CL/kernels/CLDepthConvertLayerKernel.cpp",
@@ -108,14 +100,10 @@ cc_library_static {
"src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp",
"src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp",
"src/core/CL/kernels/CLDequantizationLayerKernel.cpp",
- "src/core/CL/kernels/CLDerivativeKernel.cpp",
- "src/core/CL/kernels/CLDilateKernel.cpp",
"src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp",
- "src/core/CL/kernels/CLErodeKernel.cpp",
"src/core/CL/kernels/CLFFTDigitReverseKernel.cpp",
"src/core/CL/kernels/CLFFTRadixStageKernel.cpp",
"src/core/CL/kernels/CLFFTScaleKernel.cpp",
- "src/core/CL/kernels/CLFastCornersKernel.cpp",
"src/core/CL/kernels/CLFillBorderKernel.cpp",
"src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp",
"src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp",
@@ -134,28 +122,14 @@ cc_library_static {
"src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp",
"src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp",
"src/core/CL/kernels/CLGatherKernel.cpp",
- "src/core/CL/kernels/CLGaussian3x3Kernel.cpp",
- "src/core/CL/kernels/CLGaussian5x5Kernel.cpp",
- "src/core/CL/kernels/CLGaussianPyramidKernel.cpp",
"src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp",
- "src/core/CL/kernels/CLHOGDescriptorKernel.cpp",
- "src/core/CL/kernels/CLHOGDetectorKernel.cpp",
- "src/core/CL/kernels/CLHarrisCornersKernel.cpp",
- "src/core/CL/kernels/CLHistogramKernel.cpp",
"src/core/CL/kernels/CLIm2ColKernel.cpp",
"src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp",
- "src/core/CL/kernels/CLIntegralImageKernel.cpp",
"src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp",
"src/core/CL/kernels/CLLKTrackerKernel.cpp",
- "src/core/CL/kernels/CLMagnitudePhaseKernel.cpp",
"src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp",
- "src/core/CL/kernels/CLMeanStdDevKernel.cpp",
"src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp",
- "src/core/CL/kernels/CLMedian3x3Kernel.cpp",
"src/core/CL/kernels/CLMinMaxLayerKernel.cpp",
- "src/core/CL/kernels/CLMinMaxLocationKernel.cpp",
- "src/core/CL/kernels/CLNonLinearFilterKernel.cpp",
- "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp",
"src/core/CL/kernels/CLNormalizationLayerKernel.cpp",
"src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp",
"src/core/CL/kernels/CLPadLayerKernel.cpp",
@@ -171,22 +145,14 @@ cc_library_static {
"src/core/CL/kernels/CLReorgLayerKernel.cpp",
"src/core/CL/kernels/CLReverseKernel.cpp",
"src/core/CL/kernels/CLScaleKernel.cpp",
- "src/core/CL/kernels/CLScharr3x3Kernel.cpp",
"src/core/CL/kernels/CLSelectKernel.cpp",
- "src/core/CL/kernels/CLSobel3x3Kernel.cpp",
- "src/core/CL/kernels/CLSobel5x5Kernel.cpp",
- "src/core/CL/kernels/CLSobel7x7Kernel.cpp",
"src/core/CL/kernels/CLSoftmaxLayerKernel.cpp",
"src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp",
"src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp",
"src/core/CL/kernels/CLStackLayerKernel.cpp",
"src/core/CL/kernels/CLStridedSliceKernel.cpp",
- "src/core/CL/kernels/CLTableLookupKernel.cpp",
- "src/core/CL/kernels/CLThresholdKernel.cpp",
"src/core/CL/kernels/CLTileKernel.cpp",
"src/core/CL/kernels/CLTransposeKernel.cpp",
- "src/core/CL/kernels/CLWarpAffineKernel.cpp",
- "src/core/CL/kernels/CLWarpPerspectiveKernel.cpp",
"src/core/CL/kernels/CLWeightsReshapeKernel.cpp",
"src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp",
"src/core/CL/kernels/CLWinogradInputTransformKernel.cpp",
@@ -223,7 +189,6 @@ cc_library_static {
"src/core/NEON/kernels/NECol2ImKernel.cpp",
"src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp",
"src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp",
- "src/core/NEON/kernels/NEConvolutionKernel.cpp",
"src/core/NEON/kernels/NECropKernel.cpp",
"src/core/NEON/kernels/NECumulativeDistributionKernel.cpp",
"src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp",
@@ -260,7 +225,6 @@ cc_library_static {
"src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp",
"src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp",
"src/core/NEON/kernels/NEMinMaxLayerKernel.cpp",
- "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp",
"src/core/NEON/kernels/NENormalizationLayerKernel.cpp",
"src/core/NEON/kernels/NEPadLayerKernel.cpp",
"src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp",
@@ -271,6 +235,7 @@ cc_library_static {
"src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp",
"src/core/NEON/kernels/NERangeKernel.cpp",
"src/core/NEON/kernels/NEReductionOperationKernel.cpp",
+ "src/core/NEON/kernels/NERemapKernel.cpp",
"src/core/NEON/kernels/NEReorgLayerKernel.cpp",
"src/core/NEON/kernels/NEReverseKernel.cpp",
"src/core/NEON/kernels/NEScaleKernel.cpp",
@@ -463,8 +428,6 @@ cc_library_static {
"src/runtime/CL/CLTuner.cpp",
"src/runtime/CL/ICLSimpleFunction.cpp",
"src/runtime/CL/Utils.cpp",
- "src/runtime/CL/functions/CLAbsoluteDifference.cpp",
- "src/runtime/CL/functions/CLAccumulate.cpp",
"src/runtime/CL/functions/CLActivationLayer.cpp",
"src/runtime/CL/functions/CLArgMinMaxLayer.cpp",
"src/runtime/CL/functions/CLBatchNormalizationLayer.cpp",
@@ -474,17 +437,11 @@ cc_library_static {
"src/runtime/CL/functions/CLBitwiseOr.cpp",
"src/runtime/CL/functions/CLBitwiseXor.cpp",
"src/runtime/CL/functions/CLBoundingBoxTransform.cpp",
- "src/runtime/CL/functions/CLBox3x3.cpp",
- "src/runtime/CL/functions/CLCannyEdge.cpp",
"src/runtime/CL/functions/CLCast.cpp",
- "src/runtime/CL/functions/CLChannelCombine.cpp",
- "src/runtime/CL/functions/CLChannelExtract.cpp",
"src/runtime/CL/functions/CLChannelShuffleLayer.cpp",
- "src/runtime/CL/functions/CLColorConvert.cpp",
"src/runtime/CL/functions/CLComparison.cpp",
"src/runtime/CL/functions/CLConcatenateLayer.cpp",
"src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp",
- "src/runtime/CL/functions/CLConvolution.cpp",
"src/runtime/CL/functions/CLConvolutionLayer.cpp",
"src/runtime/CL/functions/CLCopy.cpp",
"src/runtime/CL/functions/CLCrop.cpp",
@@ -495,18 +452,13 @@ cc_library_static {
"src/runtime/CL/functions/CLDepthToSpaceLayer.cpp",
"src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp",
"src/runtime/CL/functions/CLDequantizationLayer.cpp",
- "src/runtime/CL/functions/CLDerivative.cpp",
- "src/runtime/CL/functions/CLDilate.cpp",
"src/runtime/CL/functions/CLDirectConvolutionLayer.cpp",
"src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp",
"src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp",
"src/runtime/CL/functions/CLElementwiseOperations.cpp",
- "src/runtime/CL/functions/CLEqualizeHistogram.cpp",
- "src/runtime/CL/functions/CLErode.cpp",
"src/runtime/CL/functions/CLFFT1D.cpp",
"src/runtime/CL/functions/CLFFT2D.cpp",
"src/runtime/CL/functions/CLFFTConvolutionLayer.cpp",
- "src/runtime/CL/functions/CLFastCorners.cpp",
"src/runtime/CL/functions/CLFill.cpp",
"src/runtime/CL/functions/CLFillBorder.cpp",
"src/runtime/CL/functions/CLFlattenLayer.cpp",
@@ -519,41 +471,21 @@ cc_library_static {
"src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp",
"src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp",
"src/runtime/CL/functions/CLGather.cpp",
- "src/runtime/CL/functions/CLGaussian3x3.cpp",
- "src/runtime/CL/functions/CLGaussian5x5.cpp",
- "src/runtime/CL/functions/CLGaussianPyramid.cpp",
"src/runtime/CL/functions/CLGenerateProposalsLayer.cpp",
- "src/runtime/CL/functions/CLHOGDescriptor.cpp",
- "src/runtime/CL/functions/CLHOGDetector.cpp",
- "src/runtime/CL/functions/CLHOGGradient.cpp",
- "src/runtime/CL/functions/CLHOGMultiDetection.cpp",
- "src/runtime/CL/functions/CLHarrisCorners.cpp",
- "src/runtime/CL/functions/CLHistogram.cpp",
"src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp",
- "src/runtime/CL/functions/CLIntegralImage.cpp",
"src/runtime/CL/functions/CLL2NormalizeLayer.cpp",
"src/runtime/CL/functions/CLLSTMLayer.cpp",
"src/runtime/CL/functions/CLLSTMLayerQuantized.cpp",
- "src/runtime/CL/functions/CLLaplacianPyramid.cpp",
- "src/runtime/CL/functions/CLLaplacianReconstruct.cpp",
"src/runtime/CL/functions/CLLogicalAnd.cpp",
"src/runtime/CL/functions/CLLogicalNot.cpp",
"src/runtime/CL/functions/CLLogicalOr.cpp",
- "src/runtime/CL/functions/CLMagnitude.cpp",
"src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp",
- "src/runtime/CL/functions/CLMeanStdDev.cpp",
"src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp",
- "src/runtime/CL/functions/CLMedian3x3.cpp",
- "src/runtime/CL/functions/CLMinMaxLocation.cpp",
- "src/runtime/CL/functions/CLNonLinearFilter.cpp",
- "src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp",
"src/runtime/CL/functions/CLNormalizationLayer.cpp",
"src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp",
- "src/runtime/CL/functions/CLOpticalFlow.cpp",
"src/runtime/CL/functions/CLPReluLayer.cpp",
"src/runtime/CL/functions/CLPadLayer.cpp",
"src/runtime/CL/functions/CLPermute.cpp",
- "src/runtime/CL/functions/CLPhase.cpp",
"src/runtime/CL/functions/CLPixelWiseMultiplication.cpp",
"src/runtime/CL/functions/CLPoolingLayer.cpp",
"src/runtime/CL/functions/CLPriorBoxLayer.cpp",
@@ -570,25 +502,17 @@ cc_library_static {
"src/runtime/CL/functions/CLReshapeLayer.cpp",
"src/runtime/CL/functions/CLReverse.cpp",
"src/runtime/CL/functions/CLScale.cpp",
- "src/runtime/CL/functions/CLScharr3x3.cpp",
"src/runtime/CL/functions/CLSelect.cpp",
"src/runtime/CL/functions/CLSlice.cpp",
- "src/runtime/CL/functions/CLSobel3x3.cpp",
- "src/runtime/CL/functions/CLSobel5x5.cpp",
- "src/runtime/CL/functions/CLSobel7x7.cpp",
"src/runtime/CL/functions/CLSoftmaxLayer.cpp",
"src/runtime/CL/functions/CLSpaceToBatchLayer.cpp",
"src/runtime/CL/functions/CLSpaceToDepthLayer.cpp",
"src/runtime/CL/functions/CLSplit.cpp",
"src/runtime/CL/functions/CLStackLayer.cpp",
"src/runtime/CL/functions/CLStridedSlice.cpp",
- "src/runtime/CL/functions/CLTableLookup.cpp",
- "src/runtime/CL/functions/CLThreshold.cpp",
"src/runtime/CL/functions/CLTile.cpp",
"src/runtime/CL/functions/CLTranspose.cpp",
"src/runtime/CL/functions/CLUnstack.cpp",
- "src/runtime/CL/functions/CLWarpAffine.cpp",
- "src/runtime/CL/functions/CLWarpPerspective.cpp",
"src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp",
"src/runtime/CL/functions/CLWinogradInputTransform.cpp",
"src/runtime/CL/gemm/CLGEMMDefaultTypeBifrost.cpp",
@@ -646,7 +570,6 @@ cc_library_static {
"src/runtime/NEON/functions/NEChannelShuffleLayer.cpp",
"src/runtime/NEON/functions/NEConcatenateLayer.cpp",
"src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp",
- "src/runtime/NEON/functions/NEConvolution.cpp",
"src/runtime/NEON/functions/NEConvolutionLayer.cpp",
"src/runtime/NEON/functions/NECopy.cpp",
"src/runtime/NEON/functions/NECropResize.cpp",
@@ -683,7 +606,6 @@ cc_library_static {
"src/runtime/NEON/functions/NELogical.cpp",
"src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp",
"src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp",
- "src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp",
"src/runtime/NEON/functions/NENormalizationLayer.cpp",
"src/runtime/NEON/functions/NEPReluLayer.cpp",
"src/runtime/NEON/functions/NEPadLayer.cpp",
@@ -699,6 +621,7 @@ cc_library_static {
"src/runtime/NEON/functions/NERange.cpp",
"src/runtime/NEON/functions/NEReduceMean.cpp",
"src/runtime/NEON/functions/NEReductionOperation.cpp",
+ "src/runtime/NEON/functions/NERemap.cpp",
"src/runtime/NEON/functions/NEReorgLayer.cpp",
"src/runtime/NEON/functions/NEReshapeLayer.cpp",
"src/runtime/NEON/functions/NEReverse.cpp",
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index b2bdb9a3e7..01b61c82d8 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -25,8 +25,6 @@
#define ARM_COMPUTE_CLFUNCTIONS_H
/* Header regrouping all the CL functions */
-#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
-#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h"
#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
@@ -36,17 +34,11 @@
#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h"
#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h"
#include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h"
-#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
-#include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
#include "arm_compute/runtime/CL/functions/CLCast.h"
-#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
-#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
#include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h"
-#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
#include "arm_compute/runtime/CL/functions/CLComparison.h"
#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
-#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLCopy.h"
#include "arm_compute/runtime/CL/functions/CLCrop.h"
@@ -57,18 +49,13 @@
#include "arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h"
#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
-#include "arm_compute/runtime/CL/functions/CLDerivative.h"
-#include "arm_compute/runtime/CL/functions/CLDilate.h"
#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
-#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
-#include "arm_compute/runtime/CL/functions/CLErode.h"
#include "arm_compute/runtime/CL/functions/CLFFT1D.h"
#include "arm_compute/runtime/CL/functions/CLFFT2D.h"
#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
#include "arm_compute/runtime/CL/functions/CLFill.h"
#include "arm_compute/runtime/CL/functions/CLFillBorder.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
@@ -81,41 +68,21 @@
#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
#include "arm_compute/runtime/CL/functions/CLGather.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
#include "arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
-#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
-#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h"
-#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
-#include "arm_compute/runtime/CL/functions/CLHistogram.h"
#include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h"
-#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
#include "arm_compute/runtime/CL/functions/CLLSTMLayer.h"
#include "arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
#include "arm_compute/runtime/CL/functions/CLLogicalAnd.h"
#include "arm_compute/runtime/CL/functions/CLLogicalNot.h"
#include "arm_compute/runtime/CL/functions/CLLogicalOr.h"
-#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
#include "arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h"
-#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
#include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h"
-#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
-#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h"
-#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
-#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
#include "arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h"
-#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h"
#include "arm_compute/runtime/CL/functions/CLPReluLayer.h"
#include "arm_compute/runtime/CL/functions/CLPadLayer.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
-#include "arm_compute/runtime/CL/functions/CLPhase.h"
#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
#include "arm_compute/runtime/CL/functions/CLPriorBoxLayer.h"
@@ -132,25 +99,17 @@
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/runtime/CL/functions/CLReverse.h"
#include "arm_compute/runtime/CL/functions/CLScale.h"
-#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
#include "arm_compute/runtime/CL/functions/CLSelect.h"
#include "arm_compute/runtime/CL/functions/CLSlice.h"
-#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
-#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
#include "arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h"
#include "arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h"
#include "arm_compute/runtime/CL/functions/CLSplit.h"
#include "arm_compute/runtime/CL/functions/CLStackLayer.h"
#include "arm_compute/runtime/CL/functions/CLStridedSlice.h"
-#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-#include "arm_compute/runtime/CL/functions/CLThreshold.h"
#include "arm_compute/runtime/CL/functions/CLTile.h"
#include "arm_compute/runtime/CL/functions/CLTranspose.h"
#include "arm_compute/runtime/CL/functions/CLUnstack.h"
-#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
-#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
deleted file mode 100644
index 86c8022b4f..0000000000
--- a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H
-#define ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to run @ref CLAbsoluteDifferenceKernel
- *
- * @note The tensor data types for the inputs must be U8 or S16.
- * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLAbsoluteDifference : public ICLSimpleFunction
-{
-public:
- /** Initialize the function
- *
- * @param[in] input1 First input tensor. Data types supported: U8, S16
- * @param[in] input2 Second input tensor. Data types supported: U8, S16
- * @param[out] output Output tensor. Data types supported: U8, S16
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: U8, S16
- * @param[in] input2 Second input tensor. Data types supported: U8, S16
- * @param[out] output Output tensor. Data types supported: U8, S16
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-};
-}
-#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H */
diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h
deleted file mode 100644
index f78ce0e149..0000000000
--- a/arm_compute/runtime/CL/functions/CLAccumulate.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLACCUMULATE_H
-#define ARM_COMPUTE_CLACCUMULATE_H
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to run @ref CLAccumulateKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLAccumulate : public ICLSimpleFunction
-{
-public:
- /** Set the input and accumulation tensors.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, ICLTensor *accum);
- /** Set the input and accumulation tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
-};
-
-/** Basic function to run @ref CLAccumulateWeightedKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLAccumulateWeighted : public ICLSimpleFunction
-{
-public:
- /** Set the input and accumulation tensors, and the scale value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
- /** Set the input and accumulation tensors, and the scale value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
-};
-
-/** Basic function to run @ref CLAccumulateSquaredKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLAccumulateSquared : public ICLSimpleFunction
-{
-public:
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift The input with a value input the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift The input with a value input the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
-};
-}
-#endif /*ARM_COMPUTE_CLACCUMULATE_H */
diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h
deleted file mode 100644
index 63c5d3f897..0000000000
--- a/arm_compute/runtime/CL/functions/CLBox3x3.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBOX3X3_H
-#define ARM_COMPUTE_CLBOX3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLBox3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLBox3x3 : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLBOX3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h
deleted file mode 100644
index 1c48d690a5..0000000000
--- a/arm_compute/runtime/CL/functions/CLCannyEdge.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCANNYEDGE_H
-#define ARM_COMPUTE_CLCANNYEDGE_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLFillBorderKernel;
-class CLGradientKernel;
-class CLEdgeNonMaxSuppressionKernel;
-class CLEdgeTraceKernel;
-class ICLTensor;
-
-/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions:
- *
- * -# @ref CLFillBorderKernel (if border_mode == REPLICATE or border_mode == CONSTANT)
- * -# @ref CLSobel3x3 (if gradient_size == 3) or @ref CLSobel5x5 (if gradient_size == 5) or @ref CLSobel7x7 (if gradient_size == 7)
- * -# @ref CLGradientKernel
- * -# @ref CLEdgeNonMaxSuppressionKernel
- * -# @ref CLEdgeTraceKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLCannyEdge : public IFunction
-{
-public:
- /** Constructor */
- CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCannyEdge(const CLCannyEdge &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCannyEdge &operator=(const CLCannyEdge &) = delete;
- /** Default destructor */
- ~CLCannyEdge();
- /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis.
- * @param[in] lower_thr Lower threshold used for the hysteresis.
- * @param[in] gradient_size Gradient size (3, 5 or 7).
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
- uint8_t constant_border_value = 0);
- /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis.
- * @param[in] lower_thr Lower threshold used for the hysteresis.
- * @param[in] gradient_size Gradient size (3, 5 or 7).
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
- uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- virtual void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
- std::unique_ptr<CLGradientKernel> _gradient; /**< Gradient kernel. */
- std::unique_ptr<CLFillBorderKernel> _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
- std::unique_ptr<CLEdgeNonMaxSuppressionKernel> _non_max_suppr; /**< Non-Maxima suppression kernel. */
- std::unique_ptr<CLEdgeTraceKernel> _edge_trace; /**< Edge tracing kernel. */
- CLImage _gx; /**< Source tensor - Gx component. */
- CLImage _gy; /**< Source tensor - Gy component. */
- CLImage _mag; /**< Source tensor - Magnitude. */
- CLImage _phase; /**< Source tensor - Phase. */
- CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */
- CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
- ICLTensor *_output; /**< Output tensor provided by the user. */
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_CLCANNYEDGE_H */
diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h
deleted file mode 100644
index 2a36d3f742..0000000000
--- a/arm_compute/runtime/CL/functions/CLChannelCombine.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELCOMBINE_H
-#define ARM_COMPUTE_CLCHANNELCOMBINE_H
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to run @ref CLChannelCombineKernel to perform channel combination.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLChannelCombine : public ICLSimpleFunction
-{
-public:
- /** Initialize function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor.
- */
- void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Initialize function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Initialize function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output image.
- */
- void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
- /** Initialize function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output image.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
-};
-}
-#endif /*ARM_COMPUTE_CLCHANNELCOMBINE_H*/
diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h
deleted file mode 100644
index 6cd24648ba..0000000000
--- a/arm_compute/runtime/CL/functions/CLChannelExtract.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELEXTRACT_H
-#define ARM_COMPUTE_CLCHANNELEXTRACT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to run @ref CLChannelExtractKernel to perform channel extraction.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLChannelExtract : public ICLSimpleFunction
-{
-public:
- /** Initialize the function's source, destination
- *
- * @param[in] input The input tensor to extract the channel from. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted channel. Must be of U8 format.
- */
- void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to extract the channel from. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted channel. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted 2D channel. Must be of U8 format.
- */
- void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel The channel to extract.
- * @param[out] output The extracted 2D channel. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
-};
-}
-#endif /*ARM_COMPUTE_CLCHANNELEXTRACT_H*/
diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h
deleted file mode 100644
index f30621e911..0000000000
--- a/arm_compute/runtime/CL/functions/CLColorConvert.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOLORCONVERT_H
-#define ARM_COMPUTE_CLCOLORCONVERT_H
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to run @ref CLColorConvertKernel
- *
- * @note The function performs color convert between images.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLColorConvert : public ICLSimpleFunction
-{
-public:
- /** Initialize the function's source, destination
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const ICLMultiImage *input, ICLImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const ICLImage *input, ICLMultiImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const ICLMultiImage *input, ICLMultiImage *output);
- /** Initialize the function's source, destination
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
-};
-}
-#endif /* ARM_COMPUTE_CLCOLORCONVERT_H */
diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h
deleted file mode 100644
index 44346767f3..0000000000
--- a/arm_compute/runtime/CL/functions/CLConvolution.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONVOLUTION_H
-#define ARM_COMPUTE_CLCONVOLUTION_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-template <unsigned int matrix_size>
-class CLConvolutionKernel;
-template <unsigned int matrix_size>
-class CLSeparableConvolutionHorKernel;
-template <unsigned int matrix_size>
-class CLSeparableConvolutionVertKernel;
-class CLFillBorderKernel;
-class ICLTensor;
-
-/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLConvolution3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLConvolution3x3 : public ICLSimpleFunction
-{
-public:
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-
-/** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLConvolutionKernel or<br/>
- * @ref CLSeparableConvolutionHorKernel and @ref CLSeparableConvolutionVertKernel (if convolution matrix is separable)
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-template <unsigned int matrix_size>
-class CLConvolutionSquare : public IFunction
-{
-public:
- /** Default constructor */
- CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionSquare(const CLConvolutionSquare &) = delete;
- /** Default move constructor */
- CLConvolutionSquare(CLConvolutionSquare &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionSquare &operator=(const CLConvolutionSquare &) = delete;
- /** Default move assignment operator */
- CLConvolutionSquare &operator=(CLConvolutionSquare &&) = default;
- /** Default destructor */
- ~CLConvolutionSquare();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overriden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- std::unique_ptr<CLSeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- std::unique_ptr<CLSeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- std::unique_ptr<CLConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/
- std::unique_ptr<CLFillBorderKernel> _border_handler; /**< kernel for border handling */
-};
-
-/** Basic function to run 5x5 convolution. */
-using CLConvolution5x5 = CLConvolutionSquare<5>;
-/** Basic function to run 7x7 convolution. */
-using CLConvolution7x7 = CLConvolutionSquare<7>;
-/** Basic function to run 9x9 convolution. */
-using CLConvolution9x9 = CLConvolutionSquare<9>;
-
-/** Basic function to execute non-square convolution. This function calls the following CL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLConvolutionRectangleKernel or<br/>
- *
- * @note Convolution rectangle should have dimensions of 3, 5, 7, 9
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLConvolutionRectangle : public ICLSimpleFunction
-{
-public:
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] rows Rows of convolution kernel.
- * @param[in] cols Columns of convolution kernel.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] rows Rows of convolution kernel.
- * @param[in] cols Columns of convolution kernel.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode,
- uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLCONVOLUTION_H */
diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h
deleted file mode 100644
index 8918dac0ea..0000000000
--- a/arm_compute/runtime/CL/functions/CLDerivative.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDERIVATIVE_H
-#define ARM_COMPUTE_CLDERIVATIVE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute first order derivative operator. This function calls the following CL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLDerivativeKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLDerivative : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data types supported: S16.
- * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination tensor. Derivative along the X direction. Data types supported: S16.
- * @param[out] output_y (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /* ARM_COMPUTE_CLDERIVATIVE_H */
diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h
deleted file mode 100644
index e15621b5a4..0000000000
--- a/arm_compute/runtime/CL/functions/CLDilate.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDILATE_H
-#define ARM_COMPUTE_CLDILATE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute dilate. This function calls the following OpenCL kernels:
-*
-* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
-* -# @ref CLDilateKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLDilate : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output and border mode.
- *
- * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the kernel's inputs, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLDILATE_H */
diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
deleted file mode 100644
index 41479e3f22..0000000000
--- a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
-#define ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
-
-#include "arm_compute/runtime/CL/CLDistribution1D.h"
-#include "arm_compute/runtime/CL/CLLut.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLHistogramKernel;
-class CLHistogramBorderKernel;
-class CLTableLookupKernel;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to execute histogram equalization. This function calls the following CL kernels:
- *
- * -# @ref CLHistogramKernel
- * -# @ref CLTableLookupKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLEqualizeHistogram : public IFunction
-{
-public:
- /** Default Constructor. */
- CLEqualizeHistogram();
- /** Prevent instances of this class from being copied */
- CLEqualizeHistogram(const CLEqualizeHistogram &) = delete;
- /** Prevent instances of this class from being copied */
- CLEqualizeHistogram &operator=(const CLEqualizeHistogram &) = delete;
- /** Default destructor */
- ~CLEqualizeHistogram();
- /** Initialise the kernel's inputs.
- *
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] output Output of same data type with equalized brightness and contrast.
- */
- void configure(const ICLImage *input, ICLImage *output);
- /** Initialise the kernel's inputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] output Output of same data type with equalized brightness and contrast.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<CLHistogramKernel> _histogram_kernel; /**< Kernel that calculates the histogram of input. */
- std::unique_ptr<CLHistogramBorderKernel> _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
- std::unique_ptr<CLTableLookupKernel> _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
- CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */
- CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
- CLLut _cd_lut; /**< Holds the equalization lookuptable. */
- static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */
- static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */
-};
-}
-#endif /*ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H */
diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h
deleted file mode 100644
index bd66ed983b..0000000000
--- a/arm_compute/runtime/CL/functions/CLErode.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLERODE_H
-#define ARM_COMPUTE_CLERODE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute erode. This function calls the following OpenCL kernels:
-*
-* -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
-* -# @ref CLErodeKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLErode : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output and border mode
- *
- * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the kernel's inputs, output and border mode
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLERODE_H */
diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h
deleted file mode 100644
index 608fdf8002..0000000000
--- a/arm_compute/runtime/CL/functions/CLFastCorners.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFASTCORNERS_H
-#define ARM_COMPUTE_CLFASTCORNERS_H
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLFastCornersKernel;
-class CLCopyToArrayKernel;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to execute fast corners. This function calls the following CL kernels:
- *
- * -# @ref CLFastCornersKernel
- * -# @ref CLNonMaximaSuppression3x3Kernel (executed if nonmax_suppression == true)
- * -# @ref CLCopyToArrayKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLFastCorners : public IFunction
-{
-public:
- /** Constructor */
- CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFastCorners(const CLFastCorners &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- const CLFastCorners &operator=(const CLFastCorners &) = delete;
- /** Default destructor */
- ~CLFastCorners();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in,out] num_corners Record number of corners in the array
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners,
- BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] nonmax_suppression If true, non-maximum suppression is applied to detected corners before being placed in the array.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in,out] num_corners Record number of corners in the array
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners,
- BorderMode border_mode, uint8_t constant_border_value = 0);
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- std::unique_ptr<CLFastCornersKernel> _fast_corners_kernel;
- CLNonMaximaSuppression3x3 _suppr_func;
- std::unique_ptr<CLCopyToArrayKernel> _copy_array_kernel;
- CLImage _output;
- CLImage _suppr;
- Window _win;
- bool _non_max;
- unsigned int *_num_corners;
- cl::Buffer _num_buffer;
- ICLKeyPointArray *_corners;
- uint8_t _constant_border_value;
-};
-}
-#endif /*ARM_COMPUTE_CLFASTCORNERS_H */
diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
deleted file mode 100644
index 20ce2b4bea..0000000000
--- a/arm_compute/runtime/CL/functions/CLGaussian3x3.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN3X3_H
-#define ARM_COMPUTE_CLGAUSSIAN3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLGaussian3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLGaussian3x3 : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLGAUSSIAN3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
deleted file mode 100644
index d08cef21c3..0000000000
--- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN5X5_H
-#define ARM_COMPUTE_CLGAUSSIAN5X5_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLFillBorderKernel;
-class CLGaussian5x5HorKernel;
-class CLGaussian5x5VertKernel;
-class ICLTensor;
-
-/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLGaussian5x5HorKernel
- * -# @ref CLGaussian5x5VertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLGaussian5x5 : public IFunction
-{
-public:
- /** Default Constructor.
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied */
- CLGaussian5x5(const CLGaussian5x5 &) = delete;
- /** Default move constructor */
- CLGaussian5x5(CLGaussian5x5 &&) = default;
- /** Prevent instances of this class from being copied */
- CLGaussian5x5 &operator=(const CLGaussian5x5 &) = delete;
- /** Default move assignment operator */
- CLGaussian5x5 &operator=(CLGaussian5x5 &&) = default;
- /** Default destructor */
- ~CLGaussian5x5();
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<CLGaussian5x5HorKernel> _kernel_hor; /**< Horizontal pass kernel */
- std::unique_ptr<CLGaussian5x5VertKernel> _kernel_vert; /**< Vertical pass kernel */
- std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp; /**< Temporary buffer */
-};
-}
-#endif /*ARM_COMPUTE_CLGAUSSIAN5X5_H */
diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
deleted file mode 100644
index 70be6738a6..0000000000
--- a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMID_H
-#define ARM_COMPUTE_CLGAUSSIANPYRAMID_H
-
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLFillBorderKernel;
-class ICLTensor;
-class CLGaussianPyramidHorKernel;
-class CLGaussianPyramidVertKernel;
-class CLScaleKernel;
-
-/** Common interface for all Gaussian pyramid functions
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLGaussianPyramid : public IFunction
-{
-public:
- /** Constructor */
- CLGaussianPyramid();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramid(const CLGaussianPyramid &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramid &operator=(const CLGaussianPyramid &) = delete;
- /** Allow instances of this class to be moved */
- CLGaussianPyramid(CLGaussianPyramid &&) = default;
- /** Allow instances of this class to be moved */
- CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default;
- /** Default destructor */
- ~CLGaussianPyramid();
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: U8.
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0;
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: U8.
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- virtual void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0;
-
-protected:
- ICLTensor *_input;
- CLPyramid *_pyramid;
- CLPyramid _tmp;
-};
-
-/** Basic function to execute gaussian pyramid with HALF scale factor. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLGaussianPyramidHorKernel
- * -# @ref CLGaussianPyramidVertKernel
- */
-class CLGaussianPyramidHalf : public CLGaussianPyramid
-{
-public:
- /** Constructor */
- CLGaussianPyramidHalf();
- /** Prevent instances of this class from being copied */
- CLGaussianPyramidHalf(const CLGaussianPyramidHalf &) = delete;
- /** Prevent instances of this class from being copied */
- CLGaussianPyramidHalf &operator=(const CLGaussianPyramidHalf &) = delete;
- /** Default destructor */
- ~CLGaussianPyramidHalf();
-
- // Inherited methods overridden:
- void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void run() override;
-
-private:
- std::vector<std::unique_ptr<CLFillBorderKernel>> _horizontal_border_handler;
- std::vector<std::unique_ptr<CLFillBorderKernel>> _vertical_border_handler;
- std::vector<std::unique_ptr<CLGaussianPyramidHorKernel>> _horizontal_reduction;
- std::vector<std::unique_ptr<CLGaussianPyramidVertKernel>> _vertical_reduction;
-};
-
-/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLGaussian5x5
- * -# @ref CLScaleKernel
- */
-class CLGaussianPyramidOrb : public CLGaussianPyramid
-{
-public:
- /** Constructor */
- CLGaussianPyramidOrb();
-
- // Inherited methods overridden:
- void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
- void run() override;
-
-private:
- std::vector<CLGaussian5x5> _gauss5x5;
- std::vector<std::unique_ptr<CLScaleKernel>> _scale_nearest;
-};
-}
-#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMID_H */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
deleted file mode 100644
index 87bcd7f49e..0000000000
--- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDESCRIPTOR_H
-#define ARM_COMPUTE_CLHOGDESCRIPTOR_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class IHOG;
-class CLHOGOrientationBinningKernel;
-class CLHOGBlockNormalizationKernel;
-/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels:
- *
- * -# @ref CLHOGGradient
- * -# @ref CLHOGOrientationBinningKernel
- * -# @ref CLHOGBlockNormalizationKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLHOGDescriptor : public IFunction
-{
-public:
- /** Default constructor */
- CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied */
- CLHOGDescriptor(const CLHOGDescriptor &) = delete;
- /** Prevent instances of this class from being copied */
- CLHOGDescriptor &operator=(const CLHOGDescriptor &) = delete;
- /** Default destructor */
- ~CLHOGDescriptor();
- /** Initialise the function's source, destination, HOG data-object and border mode
- *
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
- * @param[in] hog HOG data object which describes the HOG descriptor
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destination, HOG data-object and border mode
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
- * @param[in] hog HOG data object which describes the HOG descriptor
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient;
- std::unique_ptr<CLHOGOrientationBinningKernel> _orient_bin;
- std::unique_ptr<CLHOGBlockNormalizationKernel> _block_norm;
- CLTensor _mag;
- CLTensor _phase;
- CLTensor _hog_space;
-};
-}
-
-#endif /* ARM_COMPUTE_CLHOGDESCRIPTOR_H */
diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h
deleted file mode 100644
index 539a521797..0000000000
--- a/arm_compute/runtime/CL/functions/CLHOGDetector.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDETECTOR_H
-#define ARM_COMPUTE_CLHOGDETECTOR_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLHOGDetectorKernel;
-class ICLTensor;
-class ICLHOG;
-
-/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel:
- *
- * -# @ref CLHOGDetectorKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLHOGDetector : public IFunction
-{
-public:
- /** Default constructor */
- CLHOGDetector();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetector(const CLHOGDetector &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetector &operator=(const CLHOGDetector &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGDetector(CLHOGDetector &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGDetector &operator=(CLHOGDetector &&) = default;
- /** Default destructor */
- ~CLHOGDetector();
- /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
- *
- * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
- *
- * @param[in] input Input tensor. It is the output of @ref CLHOGDescriptor. Data type supported: F32
- * @param[in] hog HOG data-object that describes the HOG descriptor
- * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the block stride stored in hog
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
- /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
- *
- * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. It is the output of @ref CLHOGDescriptor. Data type supported: F32
- * @param[in] hog HOG data-object that describes the HOG descriptor
- * @param[out] detection_windows Array of @ref DetectionWindow used to store the detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the block stride stored in hog
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
- float threshold = 0.0f,
- size_t idx_class = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<CLHOGDetectorKernel> _hog_detector_kernel;
- ICLDetectionWindowArray *_detection_windows;
- cl::Buffer _num_detection_windows;
-};
-}
-
-#endif /* ARM_COMPUTE_CLHOGDETECTOR_H */
diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
deleted file mode 100644
index 569490f333..0000000000
--- a/arm_compute/runtime/CL/functions/CLHOGGradient.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGGRADIENT_H
-#define ARM_COMPUTE_CLHOGGRADIENT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLDerivative.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLMagnitudePhaseKernel;
-class ITensorInfo;
-/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels:
- *
- * -# @ref CLDerivative
- * -# @ref CLMagnitudePhaseKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLHOGGradient : public IFunction
-{
-public:
- /** Default constructor */
- CLHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialise the function's source, destinations, phase type and border mode
- *
- * @param[in, out] input Input tensor. Data type supported: U8.
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16.
- * @param[out] output_phase Output tensor.(phase). Format supported: U8
- * @param[in] phase_type Type of @ref PhaseType
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations, phase type and border mode
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Input tensor. Data type supported: U8.
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_magnitude Output tensor (magnitude). Data type supported: U16.
- * @param[out] output_phase Output tensor.(phase). Format supported: U8
- * @param[in] phase_type Type of @ref PhaseType
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode,
- uint8_t constant_border_value = 0);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- CLDerivative _derivative;
- std::unique_ptr<CLMagnitudePhaseKernel> _mag_phase;
- CLTensor _gx;
- CLTensor _gy;
-};
-}
-#endif /*ARM_COMPUTE_CLHOGGRADIENT_H */
diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
deleted file mode 100644
index b9a51653f2..0000000000
--- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGMULTIDETECTION_H
-#define ARM_COMPUTE_CLHOGMULTIDETECTION_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLMultiHOG.h"
-#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
-#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLHOGOrientationBinningKernel;
-class CLHOGBlockNormalizationKernel;
-/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels:
- *
- * -# @ref CLHOGGradient
- * -# @ref CLHOGOrientationBinningKernel
- * -# @ref CLHOGBlockNormalizationKernel
- * -# @ref CLHOGDetector
- * -# @ref CPPDetectionWindowNonMaximaSuppressionKernel (executed if non_maxima_suppression == true)
- *
- * @note This implementation works if all the HOG data-objects within the IMultiHOG container have the same:
- * -# Phase type
- -# Normalization type
- -# L2 hysteresis threshold if the normalization type is L2HYS_NORM
- *
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLHOGMultiDetection : public IFunction
-{
-public:
- /** Default constructor */
- CLHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete;
- /** Default destructor */
- ~CLHOGMultiDetection();
- /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
- *
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
- * This container should store the HOG data-objects in descending or ascending cell_size width order.
- * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
- * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects
- * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
- * The dimension of this array must be the same of multi_hog->num_models()
- * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not.
- * True if the non-maxima suppression stage has to be computed
- * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
- *
- */
- void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
- uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
- /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Input tensor. Data type supported: U8
- * (Written to only for @p border_mode != UNDEFINED)
- * @param[in] multi_hog Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
- * This container should store the HOG data-objects in descending or ascending cell_size width order.
- * This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
- * @param[out] detection_windows Array of @ref DetectionWindow used for locating the detected objects
- * @param[in] detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
- * The dimension of this array must be the same of multi_hog->num_models()
- * The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] non_maxima_suppression (Optional) Flag to specify whether the non-maxima suppression is required or not.
- * True if the non-maxima suppression stage has to be computed
- * @param[in] min_distance (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
- *
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides,
- BorderMode border_mode, uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
-
- // Inherited method overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient_kernel;
- std::vector<std::unique_ptr<CLHOGOrientationBinningKernel>> _orient_bin_kernel;
- std::vector<std::unique_ptr<CLHOGBlockNormalizationKernel>> _block_norm_kernel;
- std::vector<CLHOGDetector> _hog_detect_kernel;
- CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
- std::vector<CLTensor> _hog_space;
- std::vector<CLTensor> _hog_norm_space;
- ICLDetectionWindowArray *_detection_windows;
- CLTensor _mag;
- CLTensor _phase;
- bool _non_maxima_suppression;
- size_t _num_orient_bin_kernel;
- size_t _num_block_norm_kernel;
- size_t _num_hog_detect_kernel;
-};
-}
-
-#endif /* ARM_COMPUTE_CLHOGMULTIDETECTION_H */
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
deleted file mode 100644
index 7f4a456eb3..0000000000
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHARRISCORNERS_H
-#define ARM_COMPUTE_CLHARRISCORNERS_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLHarrisScoreKernel;
-class CLFillBorderKernel;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to execute harris corners detection. This function calls the following CL and Neon kernels and functions:
- *
- * @note Requires CPU support for the kernels: CPPCornerCandidatesKernel and CPPSortEuclideanDistanceKernel.
- *
- * -# @ref CLSobel3x3 (if gradient_size == 3) or<br/>
- * @ref CLSobel5x5 (if gradient_size == 5) or<br/>
- * @ref CLSobel7x7 (if gradient_size == 7)
- * -# @ref CLFillBorderKernel
- * -# @ref CLHarrisScoreKernel
- * -# @ref CLNonMaximaSuppression3x3
- * -# @ref CPPCornerCandidatesKernel
- * -# @ref CPPSortEuclideanDistanceKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLHarrisCorners : public IFunction
-{
-public:
- /** Constructor */
- CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHarrisCorners(const CLHarrisCorners &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete;
- /** Default destructor */
- ~CLHarrisCorners();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] min_dist Radial Euclidean distance for the euclidean distance stage.
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7
- * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
- */
- void configure(ICLImage *input, float threshold, float min_dist, float sensitivity,
- int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] threshold Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] min_dist Radial Euclidean distance for the euclidean distance stage.
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] gradient_size The gradient window size to use on the input. The implementation supports 3, 5, and 7
- * @param[in] block_size The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
- * @param[out] corners Array of keypoints to store the results.
- * @param[in] border_mode Border mode to use
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] use_fp16 (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
- */
- void configure(const CLCompileContext &compile_context, ICLImage *input, float threshold, float min_dist, float sensitivity,
- int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Sobel function */
- std::unique_ptr<CLHarrisScoreKernel> _harris_score; /**< Harris score kernel */
- CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
- CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
- CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
- std::unique_ptr<CLFillBorderKernel> _border_gx; /**< Border handler before running harris score */
- std::unique_ptr<CLFillBorderKernel> _border_gy; /**< Border handler before running harris score */
- CLImage _gx; /**< Source image - Gx component */
- CLImage _gy; /**< Source image - Gy component */
- CLImage _score; /**< Source image - Harris score */
- CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */
- std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
- int32_t _num_corner_candidates; /**< Number of potential corner candidates */
- ICLKeyPointArray *_corners; /**< Output corners array */
-};
-}
-#endif /*ARM_COMPUTE_CLHARRISCORNERS_H */
diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h
deleted file mode 100644
index b45a79e10e..0000000000
--- a/arm_compute/runtime/CL/functions/CLHistogram.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHISTOGRAM_H
-#define ARM_COMPUTE_CLHISTOGRAM_H
-
-#include "arm_compute/runtime/IFunction.h"
-#include "src/core/CL/kernels/CLHistogramKernel.h"
-
-namespace arm_compute
-{
-class ICLDistribution1D;
-class ICLTensor;
-
-/** Basic function to execute histogram. This function calls the following OpenCL kernels:
- *
- * -# @ref CLHistogramKernel
- * -# @ref CLHistogramBorderKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLHistogram : public IFunction
-{
-public:
- /*
- * @ Default constructor
- */
- CLHistogram();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogram(const CLHistogram &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- const CLHistogram &operator=(const CLHistogram &) = delete;
- /** Initialize the function
- *
- * @param[in] input Source image. Data types supported: U8
- * @param[out] output Output distribution.
- */
- void configure(const ICLImage *input, ICLDistribution1D *output);
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8
- * @param[out] output Output distribution.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- CLHistogramKernel _kernel; /**< kernel to run */
- CLHistogramBorderKernel _kernel_border; /**< Border kernel to run */
-};
-}
-#endif /*ARM_COMPUTE_CLHISTOGRAM_H */
diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h
deleted file mode 100644
index b6c98dc9ab..0000000000
--- a/arm_compute/runtime/CL/functions/CLIntegralImage.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLINTEGRALIMAGE_H
-#define ARM_COMPUTE_CLINTEGRALIMAGE_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLIntegralImageHorKernel;
-class CLIntegralImageVertKernel;
-class ICLTensor;
-
-/** Basic function to execute integral image. This function calls the following OpenCL kernels:
- *
- * -# @ref CLIntegralImageHorKernel
- * -# @ref CLIntegralImageVertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLIntegralImage : public IFunction
-{
-public:
- /** Default Constructor. */
- CLIntegralImage();
- /** Prevent instances of this class from being copied */
- CLIntegralImage(const CLIntegralImage &) = delete;
- /** Prevent instances of this class from being copied */
- CLIntegralImage &operator=(const CLIntegralImage &) = delete;
- /** Default destructor */
- ~CLIntegralImage();
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- std::unique_ptr<CLIntegralImageHorKernel> _integral_hor; /**< Integral Image Horizontal kernel */
- std::unique_ptr<CLIntegralImageVertKernel> _integral_vert; /**< Integral Image Vertical kernel */
-};
-}
-#endif /*ARM_COMPUTE_CLINTEGRALIMAGE_H */
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
deleted file mode 100644
index 875b714edd..0000000000
--- a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLLAPLACIANPYRAMID_H
-#define ARM_COMPUTE_CLLAPLACIANPYRAMID_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
-#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute laplacian pyramid. This function calls the following OpenCL kernels and functions:
- *
- * -# @ref CLGaussianPyramidHalf
- * -# @ref CLGaussian5x5
- * -# @ref CLArithmeticSubtraction
- *
- * First a Gaussian pyramid is created. Then, for each level i, the corresponding tensor I(i) is blurred with the Gaussian 5x5 filter, and then
- * difference between the two tensors is the corresponding level L(i) of the Laplacian pyramid.
- * L(i) = I(i) - Gaussian5x5(I(i))
- * Level 0 has always the same first two dimensions as the input tensor.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLLaplacianPyramid : public IFunction
-{
-public:
- /** Constructor */
- CLLaplacianPyramid();
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: S16.
- * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16.
- * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is:
- * output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1)
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] pyramid Destination pyramid tensors, Data types supported at each level: S16.
- * @param[out] output The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16.
- * The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is:
- * output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1)
- * @param[in] border_mode Border mode to use.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- size_t _num_levels;
- CLGaussianPyramidHalf _gaussian_pyr_function;
- std::vector<CLGaussian5x5> _convf;
- std::vector<CLArithmeticSubtraction> _subf;
- CLDepthConvertLayer _depth_function;
- CLPyramid _gauss_pyr;
- CLPyramid _conv_pyr;
-};
-}
-#endif /*ARM_COMPUTE_CLLAPLACIANPYRAMID_H */
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
deleted file mode 100644
index c780b56dd8..0000000000
--- a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H
-#define ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
-#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
-#include "arm_compute/runtime/CL/functions/CLScale.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to execute laplacian reconstruction. This function calls the following OpenCL kernels and functions:
- *
- * -# @ref CLArithmeticAddition
- * -# @ref CLScale
- * -# @ref CLDepthConvertLayer
- *
- * This function reconstructs the original image from a Laplacian Image Pyramid.
- *
- * The input image is added to the last level of the Laplacian pyramid L(n-2), the resulting image is upsampled to the
- * resolution of the next pyramid level.
- *
- * I(n-2) = upsample( input + L(n-1)
- *
- * For each pyramid level i, except i=0 and i=n-1:
- * I(i-1) = upsample(I(i) + L(i))
- *
- * output = I(0) + L(0)
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLLaplacianReconstruct : public IFunction
-{
-public:
- /** Constructor */
- CLLaplacianReconstruct();
- /** Initialise the function's source, destinations and border mode.
- *
- * The Output image must have the same size as the first level of the pyramid.
- * The Input image must have the same size as the last level of the pyramid.
- *
- * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid.
- *
- * @param[in] pyramid Laplacian pyramid tensors, Data types supported at each level: S16.
- * @param[in] input Source tensor. Data types supported: S16.
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
- /** Initialise the function's source, destinations and border mode.
- *
- * The Output image must have the same size as the first level of the pyramid.
- * The Input image must have the same size as the last level of the pyramid.
- *
- * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] pyramid Laplacian pyramid tensors, Data types supported at each level: S16.
- * @param[in] input Source tensor. Data types supported: S16.
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- *
- */
- void configure(const CLCompileContext &compile_context, const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- CLPyramid _tmp_pyr;
- std::vector<CLArithmeticAddition> _addf;
- std::vector<CLScale> _scalef;
- CLDepthConvertLayer _depthf;
-};
-}
-#endif /*ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H */
diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h
deleted file mode 100644
index 4ed1414613..0000000000
--- a/arm_compute/runtime/CL/functions/CLMagnitude.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMAGNITUDE_H
-#define ARM_COMPUTE_CLMAGNITUDE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to run @ref CLMagnitudePhaseKernel.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLMagnitude : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's inputs.
- *
- * @param[in] input1 First tensor input. Data types supported: S16.
- * @param[in] input2 Second tensor input. Data types supported: S16.
- * @param[out] output Output tensor. Data types supported: S16.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
- /** Initialise the kernel's inputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First tensor input. Data types supported: S16.
- * @param[in] input2 Second tensor input. Data types supported: S16.
- * @param[out] output Output tensor. Data types supported: S16.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
-};
-}
-#endif /*ARM_COMPUTE_CLMAGNITUDE_H */
diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
deleted file mode 100644
index d9ced1393e..0000000000
--- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEANSTDDEV_H
-#define ARM_COMPUTE_CLMEANSTDDEV_H
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-class ITensorInfo;
-class CLFillBorderKernel;
-class CLMeanStdDevKernel;
-/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */
-class CLMeanStdDev : public IFunction
-{
-public:
- /** Default Constructor. */
- CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDev(const CLMeanStdDev &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDev &operator=(const CLMeanStdDev &) = delete;
- /** Allow instances of this class to be moved */
- CLMeanStdDev(CLMeanStdDev &&) = default;
- /** Allow instances of this class to be moved */
- CLMeanStdDev &operator=(CLMeanStdDev &&) = default;
- /** Default destructor */
- ~CLMeanStdDev();
- /** Initialise the kernel's inputs and outputs.
- *
- * @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
- * @param[out] mean Output average pixel value.
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- */
- void configure(ICLImage *input, float *mean, float *stddev = nullptr);
- /** Initialise the kernel's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
- * @param[out] mean Output average pixel value.
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- */
- void configure(const CLCompileContext &compile_context, ICLImage *input, float *mean, float *stddev = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDev
- *
- * @param[in] input Input image. Data types supported: U8/F16/F32.
- * @param[in] mean Output average pixel value.
- * @param[in] stddev (Optional) Output standard deviation of pixel values.
- *
- * @return a status
- */
- static Status validate(ITensorInfo *input, float *mean, float *stddev = nullptr);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- template <typename T>
- void run_float();
- void run_int();
-
- MemoryGroup _memory_group; /**< Function's memory group */
- DataType _data_type; /**< Input data type. */
- unsigned int _num_pixels; /**< Number of image's pixels. */
- bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
- CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
- CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
- CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
- CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
- float *_mean; /**< Pointer that holds the mean value. */
- float *_stddev; /**< Pointer that holds the standard deviation value. */
- std::unique_ptr<CLMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- std::unique_ptr<CLFillBorderKernel> _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
- cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
-};
-}
-#endif /*ARM_COMPUTE_CLMEANSTDDEV_H */
diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h
deleted file mode 100644
index 1fe318e851..0000000000
--- a/arm_compute/runtime/CL/functions/CLMedian3x3.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEDIAN3X3_H
-#define ARM_COMPUTE_CLMEDIAN3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute median filter. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLMedian3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLMedian3x3 : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLMEDIAN3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
deleted file mode 100644
index 77c381f64d..0000000000
--- a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMINMAXLOCATION_H
-#define ARM_COMPUTE_CLMINMAXLOCATION_H
-
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLMinMaxKernel;
-class CLMinMaxLocationKernel;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Basic function to execute min and max location. This function calls the following OpenCL kernels:
- *
- * -# @ref CLMinMaxKernel
- * -# @ref CLMinMaxLocationKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLMinMaxLocation : public IFunction
-{
-public:
- /** Constructor */
- CLMinMaxLocation();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocation(const CLMinMaxLocation &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocation &operator=(const CLMinMaxLocation &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxLocation(CLMinMaxLocation &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default;
- /** Default destructor */
- ~CLMinMaxLocation();
- /** Initialise the kernel's inputs and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- * @param[out] min_count (Optional) Number of minimum value encounters.
- * @param[out] max_count (Optional) Number of maximum value encounters.
- */
- void configure(const ICLImage *input, void *min, void *max,
- CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr,
- uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
- /** Initialise the kernel's inputs and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- * @param[out] min_count (Optional) Number of minimum value encounters.
- * @param[out] max_count (Optional) Number of maximum value encounters.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, void *min, void *max,
- CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr,
- uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- std::unique_ptr<CLMinMaxKernel> _min_max_kernel; /**< Kernel that performs min/max */
- std::unique_ptr<CLMinMaxLocationKernel> _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
- cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */
- cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- uint32_t *_min_count; /**< Minimum value occurrences. */
- uint32_t *_max_count; /**< Maximum value occurrences. */
- CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */
- CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */
-};
-}
-#endif /*ARM_COMPUTE_CLMINMAXLOCATION_H */
diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
deleted file mode 100644
index 3d0947db05..0000000000
--- a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONLINEARFILTER_H
-#define ARM_COMPUTE_CLNONLINEARFILTER_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute non linear filter. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLNonLinearFilterKernel
- *
- * @note Supported mask dimensions squares of sizes 3, 5
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLNonLinearFilter : public ICLSimpleFunction
-{
-public:
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLNONLINEARFILTER_H */
diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
deleted file mode 100644
index 60dad42814..0000000000
--- a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H
-#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLNonMaximaSuppression3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLNonMaximaSuppression3x3 : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- * The constant values used with CONSTANT border mode is 0
- *
- * @param[in,out] input Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input.
- * @param[in] border_mode Border mode to use for non-maxima suppression.
- * The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- */
- void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- * The constant values used with CONSTANT border mode is 0
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input.
- * @param[in] border_mode Border mode to use for non-maxima suppression.
- * The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode);
-};
-}
-#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
deleted file mode 100644
index 5c555f5709..0000000000
--- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLOPTICALFLOW_H
-#define ARM_COMPUTE_CLOPTICALFLOW_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLPyramid;
-class CLLKTrackerInitKernel;
-class CLLKTrackerStage0Kernel;
-class CLLKTrackerStage1Kernel;
-class CLLKTrackerFinalizeKernel;
-
-/** OpenCL Array of Internal Keypoints */
-using CLLKInternalKeypointArray = CLArray<CLLKInternalKeypoint>;
-/** OpenCL Array of Coefficient Tables */
-using CLCoefficientTableArray = CLArray<CLCoefficientTable>;
-/** OpenCL Array of Old Values */
-using CLOldValueArray = CLArray<CLOldValue>;
-
-/** Basic function to execute optical flow. This function calls the following OpenCL kernels and functions:
- *
- * -# @ref CLScharr3x3
- * -# @ref CLLKTrackerInitKernel
- * -# @ref CLLKTrackerStage0Kernel
- * -# @ref CLLKTrackerStage1Kernel
- * -# @ref CLLKTrackerFinalizeKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLOpticalFlow : public IFunction
-{
-public:
- /** Default constructor */
- CLOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLOpticalFlow(const CLOpticalFlow &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLOpticalFlow &operator=(const CLOpticalFlow &) = delete;
- /** Allow instances of this class to be moved */
- CLOpticalFlow(CLOpticalFlow &&) = default;
- /** Allow instances of this class to be moved */
- CLOpticalFlow &operator=(CLOpticalFlow &&) = default;
- /** Default destructor */
- ~CLOpticalFlow();
- /** Initialise the function input and output
- *
- * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8
- * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data types supported U8
- * @param[in] old_points Pointer to the IKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
- * @param[out] new_points Pointer to the IKeyPointArray storing new key points
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] border_mode The border mode applied at scharr kernel stage
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT
- *
- */
- void configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
- const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
- BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8
- * @param[in] new_pyramid Pointer to the pyramid for the new tensor. Data types supported U8
- * @param[in] old_points Pointer to the IKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
- * @param[out] new_points Pointer to the IKeyPointArray storing new key points
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminate the alogrithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] border_mode The border mode applied at scharr kernel stage
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT
- *
- */
- void configure(const CLCompileContext &compile_context, const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
- const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
- BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- std::vector<std::unique_ptr<CLLKTrackerInitKernel>> _tracker_init_kernel;
- std::vector<std::unique_ptr<CLLKTrackerStage0Kernel>> _tracker_stage0_kernel;
- std::vector<std::unique_ptr<CLLKTrackerStage1Kernel>> _tracker_stage1_kernel;
- std::unique_ptr<CLLKTrackerFinalizeKernel> _tracker_finalize_kernel;
- std::vector<CLScharr3x3> _func_scharr;
- std::vector<CLTensor> _scharr_gx;
- std::vector<CLTensor> _scharr_gy;
- const ICLKeyPointArray *_old_points;
- const ICLKeyPointArray *_new_points_estimates;
- ICLKeyPointArray *_new_points;
- std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
- std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
- std::unique_ptr<CLCoefficientTableArray> _coefficient_table;
- std::unique_ptr<CLOldValueArray> _old_values;
- size_t _num_levels;
-};
-}
-#endif /*ARM_COMPUTE_CLOPTICALFLOW_H */
diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h
deleted file mode 100644
index 7c76c234fe..0000000000
--- a/arm_compute/runtime/CL/functions/CLPhase.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPHASE_H
-#define ARM_COMPUTE_CLPHASE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute an @ref CLMagnitudePhaseKernel.
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLPhase : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] input1 First tensor input. Data types supported: S16.
- * @param[in] input2 Second tensor input. Data types supported: S16.
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED);
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First tensor input. Data types supported: S16.
- * @param[in] input2 Second tensor input. Data types supported: S16.
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED);
-};
-}
-#endif /*ARM_COMPUTE_CLPHASE_H */
diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h
deleted file mode 100644
index 4c747af19e..0000000000
--- a/arm_compute/runtime/CL/functions/CLScharr3x3.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSCHARR3X3_H
-#define ARM_COMPUTE_CLSCHARR3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLScharr3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLScharr3x3 : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16.
- * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16.
- * @param[out] output_y (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLSCHARR3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h
deleted file mode 100644
index 1e5745374e..0000000000
--- a/arm_compute/runtime/CL/functions/CLSobel3x3.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL3X3_H
-#define ARM_COMPUTE_CLSOBEL3X3_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLSobel3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLSobel3x3 : public ICLSimpleFunction
-{
-public:
- /** Default Constructor */
- CLSobel3x3() = default;
- /** Prevent instances of this class from being copied */
- CLSobel3x3(const CLSobel3x3 &) = delete;
- /** Prevent instances of this class from being copied */
- CLSobel3x3 &operator=(const CLSobel3x3 &) = delete;
- /** Default destructor */
- ~CLSobel3x3();
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLSOBEL3X3_H */
diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h
deleted file mode 100644
index e791d8a9e7..0000000000
--- a/arm_compute/runtime/CL/functions/CLSobel5x5.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL5X5_H
-#define ARM_COMPUTE_CLSOBEL5X5_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLFillBorderKernel;
-class CLSobel5x5HorKernel;
-class CLSobel5x5VertKernel;
-class ICLTensor;
-
-/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLSobel5x5HorKernel
- * -# @ref CLSobel5x5VertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLSobel5x5 : public IFunction
-{
-public:
- /** Default Constructor.
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied */
- CLSobel5x5(const CLSobel5x5 &) = delete;
- /** Prevent instances of this class from being copied */
- CLSobel5x5 &operator=(const CLSobel5x5 &) = delete;
- /** Default destructor */
- ~CLSobel5x5();
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16.
- * @param[out] output_y (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<CLSobel5x5HorKernel> _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
- std::unique_ptr<CLSobel5x5VertKernel> _sobel_vert; /**< Sobel Vertical 5x5 kernel */
- std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
-};
-}
-#endif /*ARM_COMPUTE_CLSOBEL5X5_H */
diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h
deleted file mode 100644
index 65e8de55b4..0000000000
--- a/arm_compute/runtime/CL/functions/CLSobel7x7.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL7X7_H
-#define ARM_COMPUTE_CLSOBEL7X7_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class CLFillBorderKernel;
-class CLSobel7x7HorKernel;
-class CLSobel7x7VertKernel;
-class ICLTensor;
-
-/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref CLSobel7x7HorKernel
- * -# @ref CLSobel7x7VertKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class CLSobel7x7 : public IFunction
-{
-public:
- /** Default Constructor.
- *
- * @param[in] memory_manager (Optional) Memory manager.
- */
- CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied */
- CLSobel7x7(const CLSobel7x7 &) = delete;
- /** Prevent instances of this class from being copied */
- CLSobel7x7 &operator=(const CLSobel7x7 &) = delete;
- /** Default destructor */
- ~CLSobel7x7();
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32.
- * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialise the function's source, destinations and border mode.
- *
- * @note At least one of output_x or output_y must be not NULL.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output_x (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32.
- * @param[out] output_y (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<CLSobel7x7HorKernel> _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
- std::unique_ptr<CLSobel7x7VertKernel> _sobel_vert; /**< Sobel Vertical 7x7 kernel */
- std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
-};
-}
-#endif /*ARM_COMPUTE_CLSOBEL7X7_H */
diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h
deleted file mode 100644
index ca59309548..0000000000
--- a/arm_compute/runtime/CL/functions/CLTableLookup.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTABLELOOKUP_H
-#define ARM_COMPUTE_CLTABLELOOKUP_H
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-class ICLLut;
-
-/** Basic function to run @ref CLTableLookupKernel */
-class CLTableLookup : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input First tensor input. Data types supported: U8 and S16
- * @param[in] lut Input lookup table. Data types supported: U8 and S16
- * @param[out] output Output tensor. Data types supported: U8 and S16
- */
- void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input First tensor input. Data types supported: U8 and S16
- * @param[in] lut Input lookup table. Data types supported: U8 and S16
- * @param[out] output Output tensor. Data types supported: U8 and S16
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
-};
-}
-#endif /*ARM_COMPUTE_CLTABLELOOKUP_H */
diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h
deleted file mode 100644
index a6817483bb..0000000000
--- a/arm_compute/runtime/CL/functions/CLThreshold.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTHRESHOLD_H
-#define ARM_COMPUTE_CLTHRESHOLD_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-// Forward declarations
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to run @ref CLThresholdKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLThreshold : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source, destination, thresholds and threshold type
- *
- * @param[in] input First tensor input. Data types supported: U8.
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] info Threshold descriptor
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
- /** Initialise the function's source, destination, thresholds and threshold type
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input First tensor input. Data types supported: U8.
- * @param[out] output Output tensor. Data types supported: U8.
- * @param[in] info Threshold descriptor
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLTHRESHOLD_H */
diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h
deleted file mode 100644
index 2f73097fcf..0000000000
--- a/arm_compute/runtime/CL/functions/CLWarpAffine.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPAFFINE_H
-#define ARM_COMPUTE_CLWARPAFFINE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLWarpAffine : public ICLSimpleFunction
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in,out] input Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The affine matrix. Must be 2x3 of type float.
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The affine matrix. Must be 2x3 of type float.
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
- uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLWARPAFFINE_H */
diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
deleted file mode 100644
index 4e2c81e71c..0000000000
--- a/arm_compute/runtime/CL/functions/CLWarpPerspective.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPPERSPECTIVE_H
-#define ARM_COMPUTE_CLWARPPERSPECTIVE_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
-*/
-class CLWarpPerspective : public ICLSimpleFunction
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
- uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_CLWARPPERSPECTIVE_H */
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 8b6649cc41..863a8a6412 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -39,7 +39,6 @@
#include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h"
#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
-#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NECopy.h"
#include "arm_compute/runtime/NEON/functions/NECropResize.h"
@@ -75,7 +74,6 @@
#include "arm_compute/runtime/NEON/functions/NELogical.h"
#include "arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h"
#include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
@@ -91,6 +89,7 @@
#include "arm_compute/runtime/NEON/functions/NERange.h"
#include "arm_compute/runtime/NEON/functions/NEReduceMean.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/NEON/functions/NERemap.h"
#include "arm_compute/runtime/NEON/functions/NEReorgLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
#include "arm_compute/runtime/NEON/functions/NEReverse.h"
diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h
deleted file mode 100644
index f2d7ae8090..0000000000
--- a/arm_compute/runtime/NEON/functions/NEConvolution.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVOLUTION_H
-#define ARM_COMPUTE_NECONVOLUTION_H
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-class NEFillBorderKernel;
-template <unsigned int matrix_size>
-class NEConvolutionKernel;
-template <unsigned int matrix_size>
-class NESeparableConvolutionHorKernel;
-template <unsigned int matrix_size>
-class NESeparableConvolutionVertKernel;
-
-/** Basic function to execute convolution of size 3x3. This function calls the following Neon kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolution3x3Kernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEConvolution3x3 : public INESimpleFunction
-{
-public:
- /** Constructor */
- NEConvolution3x3() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolution3x3(const NEConvolution3x3 &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolution3x3 &operator=(const NEConvolution3x3 &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolution3x3(NEConvolution3x3 &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolution3x3 &operator=(NEConvolution3x3 &&) = delete;
- /** Default destructor */
- ~NEConvolution3x3();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8/S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-
-/** Basic function to execute convolution of size 5x5, 7x7, 9x9. This function calls the following Neon kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolutionKernel or<br/>
- * @ref NESeparableConvolutionHorKernel and @ref NESeparableConvolutionVertKernel (if convolution matrix is separable)
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-template <unsigned int matrix_size>
-class NEConvolutionSquare : public IFunction
-{
-public:
- /** Default constructor */
- NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionSquare(const NEConvolutionSquare &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionSquare &operator=(const NEConvolutionSquare &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionSquare(NEConvolutionSquare &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionSquare &operator=(NEConvolutionSquare &&) = delete;
- /** Default destructor */
- ~NEConvolutionSquare();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function memory group */
- Tensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- std::unique_ptr<NESeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- std::unique_ptr<NESeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- std::unique_ptr<NEConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/
- std::unique_ptr<NEFillBorderKernel> _border_handler; /**< kernel for border handling */
-};
-
-/** Basic function to run 5x5 convolution. */
-using NEConvolution5x5 = NEConvolutionSquare<5>;
-/** Basic function to run 7x7 convolution. */
-using NEConvolution7x7 = NEConvolutionSquare<7>;
-/** Basic function to run 9x9 convolution. */
-using NEConvolution9x9 = NEConvolutionSquare<9>;
-
-/** Basic function to execute non-square convolution. This function calls the following Neon kernels:
- *
- * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NEConvolutionRectangleKernel or<br/>
- *
- * @note Convolution rectangle should have dimensions of 3, 5, 7, 9
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
- */
-class NEConvolutionRectangle : public INESimpleFunction
-{
-public:
- /** Constructor */
- NEConvolutionRectangle() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangle(const NEConvolutionRectangle &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangle &operator=(const NEConvolutionRectangle &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionRectangle(NEConvolutionRectangle &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEConvolutionRectangle &operator=(NEConvolutionRectangle &&) = delete;
- /** Default destructor */
- ~NEConvolutionRectangle();
- /** Initialize the function's source, destination, conv and border_mode.
- *
- * @param[in,out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor, Data types supported: U8 or S16.
- * @param[in] conv Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
- * @param[in] rows Rows of convolution kernel.
- * @param[in] cols Columns of convolution kernel.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
-};
-}
-#endif /*ARM_COMPUTE_NECONVOLUTION_H */
diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NERemap.h
index 2fff72d2ed..84d0f2ee92 100644
--- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/NEON/functions/NERemap.h
@@ -21,38 +21,43 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H
-#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H
+#ifndef ARM_COMPUTE_NEREMAP_H
+#define ARM_COMPUTE_NEREMAP_H
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <cstdint>
namespace arm_compute
{
class ITensor;
-/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following Neon kernels:
+/** Basic function to execute remap. This function calls the following Neon kernels:
*
* -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
- * -# @ref NENonMaximaSuppression3x3Kernel
+ * -# @ref NERemapKernel
*
* @deprecated This function is deprecated and is intended to be removed in 21.05 release
*
*/
-class NENonMaximaSuppression3x3 : public INESimpleFunction
+class NERemap : public INESimpleFunction
{
public:
- /** Initialise the function's source, destinations and border mode.
- *
- * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
- * The constant values used with CONSTANT border mode is 0
+ /** Initialise the function's sources, destination, interpolation policy and border mode.
*
- * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination for the Non-Maxima suppressions 3x3. Data type supported: same as @p input
- * @param[in] border_mode Border mode to use for non-maxima suppression. The implementation supports just 2 border modes: UNDEFINED and CONSTANT
+ * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
+ * @param[in] map_x Map for X coordinates. Data type supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data type supported: F32.
+ * @param[out] output Output tensor. Data type supported: U8.
+ * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
+ * @param[in] border_mode Border mode to use on the input tensor.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*
*/
- void configure(ITensor *input, ITensor *output, BorderMode border_mode);
+ void configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output,
+ InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
};
}
-#endif /* ARM_COMPUTE_NENONMAXIMASUPPRESSION3X3_H */
+#endif /*ARM_COMPUTE_NEREMAP_H */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 7659d56e25..8616cb6d13 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -27,7 +27,7 @@ namespace arm_compute
@tableofcontents
-The Computer Vision and Machine Learning library is a set of functions optimised for both Arm CPUs and GPUs using SIMD technologies.
+The Compute Library is a collection of low-level machine learning functions optimized for both Arm CPUs and GPUs using SIMD technologies.
Several builds of the library are available using various configurations:
- OS: Linux, Android, macOS or bare metal.
@@ -95,6 +95,7 @@ v21.05 Public major release
- NEChannelCombine
- NEChannelExtract
- NEColorConvert
+ - NEConvolution
- NEDerivative
- NEDilate
- NEEqualizeHistogram
@@ -119,7 +120,6 @@ v21.05 Public major release
- NENonLinearFilter
- NEOpticalFlow
- NEPhase
- - NERemap
- NEScharr3x3
- NESobel3x3
- NESobel5x5
@@ -127,9 +127,52 @@ v21.05 Public major release
- NETableLookup
- NEThreshold
- NEWarpAffine
- - NEWarpPerspective
- - Remove all GLES kernels / functions / tests / examples
+ - NEWarpPerspectiveKernel
+ - Remove all GLES kernels / functions / tests / examples
+ - Removed computer vision support from CL backend
+ - Removed the following functions:
+ - CLAbsoluteDifference
+ - CLAccumulate
+ - CLBox3x3
+ - CLCannyEdge
+ - CLChannelCombine
+ - CLChannelExtract
+ - CLColorConvert
+ - CLConvolution
+ - CLDerivative
+ - CLDilate
+ - CLEqualizeHistogram
+ - CLErode
+ - CLFastCorners
+ - CLGaussian3x3
+ - CLGaussian5x5
+ - CLGaussianPyramid
+ - CLHOGDescriptor
+ - CLHOGDetector
+ - CLHOGGradient
+ - CLHOGMultiDetection
+ - CLHarrisCorners
+ - CLHistogram
+ - CLIntegralImage
+ - CLLaplacianPyramid
+ - CLLaplacianReconstruct
+ - CLMagnitude
+ - CLMeanStdDev
+ - CLMedian3x3
+ - CLMinMaxLocation
+ - CLNonLinearFilter
+ - CLOpticalFlow
+ - CLPhase
+ - CLScharr3x3
+ - CLSobel3x3
+ - CLSobel5x5
+ - CLSobel7x7
+ - CLTableLookup
+ - CLThreshold
+ - CLWarpAffine
+ - CLWarpPerspective
+
v21.02 Public major release
- Various bug fixes.
- Various optimisations.
@@ -212,8 +255,8 @@ v20.11 Public major release
- @ref NELogicalOr
- Removed padding from Neon kernels:
- @ref NEComplexPixelWiseMultiplicationKernel
- - @ref NENonMaximaSuppression3x3Kernel
- - NERemapKernel
+ - NENonMaximaSuppression3x3Kernel
+ - @ref NERemapKernel
- @ref NEGEMMInterleave4x4Kernel
- @ref NEDirectConvolutionLayerKernel
- @ref NEScaleKernel
@@ -221,7 +264,7 @@ v20.11 Public major release
- @ref NEGEMMLowpOffsetContributionKernel
- @ref NEGEMMTranspose1xWKernel
- NEPoolingLayerKernel
- - @ref NEConvolutionKernel
+ - NEConvolutionKernel
- @ref NEDepthwiseConvolutionLayerNativeKernel
- @ref NEGEMMLowpMatrixMultiplyKernel
- @ref NEGEMMMatrixMultiplyKernel
@@ -534,7 +577,7 @@ v20.08 Public major release
- NEGEMMLowpQuantizeDownInt32ToUint8Scale
- NEGEMMMatrixAccumulateBiasesKernel
- Deprecated functions / interfaces:
- - Non-descriptor based interfaces for NEThreshold, @ref CLThreshold
+ - Non-descriptor based interfaces for NEThreshold, CLThreshold
- Non-descriptor based interfaces for @ref NEScale, @ref CLScale and GCScale
- In @ref NESoftmaxLayer, @ref NELogSoftmaxLayer, @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and GCSoftmaxLayer :
The default "axis" value for @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and GCSoftmaxLayer is changed from 1 to 0.
@@ -791,7 +834,7 @@ v19.08 Public major release
- @ref CLGEMMLowpMatrixMultiplyNativeKernel
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- @ref CLGEMMMatrixMultiplyNativeKernel
- - @ref CLMeanStdDevNormalizationKernel / @ref CLMeanStdDevNormalizationLayer
+ - CLMeanStdDevNormalizationKernel /CLMeanStdDevNormalizationLayer
- @ref CLSpaceToDepthLayerKernel / @ref CLSpaceToDepthLayer
- New examples:
- neon_opticalflow
@@ -1286,7 +1329,7 @@ v17.06 Public major release
- New OpenCL kernels / functions:
- @ref CLBatchNormalizationLayerKernel / @ref CLBatchNormalizationLayer
- CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer
- - @ref CLHOGOrientationBinningKernel @ref CLHOGBlockNormalizationKernel, @ref CLHOGDetectorKernel / @ref CLHOGDescriptor @ref CLHOGDetector @ref CLHOGGradient @ref CLHOGMultiDetection
+ - CLHOGOrientationBinningKernel CLHOGBlockNormalizationKernel, CLHOGDetectorKernel / CLHOGDescriptor CLHOGDetector CLHOGGradient CLHOGMultiDetection
- CLLocallyConnectedMatrixMultiplyKernel / CLLocallyConnectedLayer
- @ref CLWeightsReshapeKernel / @ref CLConvolutionLayerReshapeWeights
- New C++ kernels:
@@ -1308,12 +1351,12 @@ v17.05 Public bug fixes release
v17.04 Public bug fixes release
The following functions have been ported to use the new accurate padding:
- - @ref CLColorConvertKernel
- - @ref CLEdgeNonMaxSuppressionKernel
- - @ref CLEdgeTraceKernel
- - @ref CLGaussianPyramidHorKernel
- - @ref CLGaussianPyramidVertKernel
- - @ref CLGradientKernel
+ - CLColorConvertKernel
+ - CLEdgeNonMaxSuppressionKernel
+ - CLEdgeTraceKernel
+ - CLGaussianPyramidHorKernel
+ - CLGaussianPyramidVertKernel
+ - CLGradientKernel
- NEChannelCombineKernel
- @ref NEFillArrayKernel
- NEGaussianPyramidHorKernel
@@ -1324,8 +1367,8 @@ v17.04 Public bug fixes release
- NELogits1DMaxKernel
- NELogits1DShiftExpSumKernel
- NELogits1DNormKernel
- - @ref NENonMaximaSuppression3x3FP16Kernel
- - @ref NENonMaximaSuppression3x3Kernel
+ - NENonMaximaSuppression3x3FP16Kernel
+ - NENonMaximaSuppression3x3Kernel
v17.03.1 First Major public release of the sources
- Renamed the library to arm_compute
@@ -1343,13 +1386,13 @@ v17.03.1 First Major public release of the sources
v17.03 Sources preview
- New OpenCL kernels / functions:
- - @ref CLGradientKernel, @ref CLEdgeNonMaxSuppressionKernel, @ref CLEdgeTraceKernel / @ref CLCannyEdge
+ - CLGradientKernel, CLEdgeNonMaxSuppressionKernel, CLEdgeTraceKernel / CLCannyEdge
- GEMM refactoring + FP16 support: CLGEMMInterleave4x4Kernel, CLGEMMTranspose1xWKernel, @ref CLGEMMMatrixMultiplyKernel, CLGEMMMatrixAdditionKernel / @ref CLGEMM
- CLGEMMMatrixAccumulateBiasesKernel / @ref CLFullyConnectedLayer
- @ref CLTransposeKernel / @ref CLTranspose
- - @ref CLLKTrackerInitKernel, @ref CLLKTrackerStage0Kernel, @ref CLLKTrackerStage1Kernel, @ref CLLKTrackerFinalizeKernel / @ref CLOpticalFlow
+ - @ref CLLKTrackerInitKernel, @ref CLLKTrackerStage0Kernel, @ref CLLKTrackerStage1Kernel, @ref CLLKTrackerFinalizeKernel / CLOpticalFlow
- @ref CLNormalizationLayerKernel / @ref CLNormalizationLayer
- - @ref CLLaplacianPyramid, @ref CLLaplacianReconstruct
+ - CLLaplacianPyramid, CLLaplacianReconstruct
- New Neon kernels / functions:
- NEActivationLayerKernel / @ref NEActivationLayer
- GEMM refactoring + FP16 support (Requires armv8.2 CPU): @ref NEGEMMInterleave4x4Kernel, @ref NEGEMMTranspose1xWKernel, @ref NEGEMMMatrixMultiplyKernel, @ref NEGEMMMatrixAdditionKernel / @ref NEGEMM
@@ -1359,23 +1402,23 @@ v17.02.1 Sources preview
- New OpenCL kernels / functions:
- CLLogits1DMaxKernel, CLLogits1DShiftExpSumKernel, @ref CLLogits1DNormKernel / @ref CLSoftmaxLayer
- CLPoolingLayerKernel / @ref CLPoolingLayer
- - @ref CLIm2ColKernel, @ref CLCol2ImKernel, CLConvolutionLayerWeightsReshapeKernel / @ref CLConvolutionLayer
+ - @ref CLIm2ColKernel, @ref CLCol2ImKernel, CLConvolutionLayerWeightsReshapeKernel / CLConvolutionLayer
- @ref CLRemapKernel / @ref CLRemap
- - @ref CLGaussianPyramidHorKernel, @ref CLGaussianPyramidVertKernel / @ref CLGaussianPyramid, @ref CLGaussianPyramidHalf, @ref CLGaussianPyramidOrb
- - @ref CLMinMaxKernel, @ref CLMinMaxLocationKernel / @ref CLMinMaxLocation
- - @ref CLNonLinearFilterKernel / @ref CLNonLinearFilter
+ - CLGaussianPyramidHorKernel, CLGaussianPyramidVertKernel / CLGaussianPyramid, CLGaussianPyramidHalf, CLGaussianPyramidOrb
+ - CLMinMaxKernel, CLMinMaxLocationKernel / CLMinMaxLocation
+ - CLNonLinearFilterKernel / CLNonLinearFilter
- New Neon FP16 kernels (Requires armv8.2 CPU)
- NEAccumulateWeightedFP16Kernel
- NEBox3x3FP16Kernel
- - @ref NENonMaximaSuppression3x3FP16Kernel
+ - NENonMaximaSuppression3x3FP16Kernel
v17.02 Sources preview
- New OpenCL kernels / functions:
- CLActivationLayerKernel / @ref CLActivationLayer
- - @ref CLChannelCombineKernel / @ref CLChannelCombine
- - @ref CLDerivativeKernel / @ref CLChannelExtract
- - @ref CLFastCornersKernel / @ref CLFastCorners
- - @ref CLMeanStdDevKernel / @ref CLMeanStdDev
+ - CLChannelCombineKernel / CLChannelCombine
+ - CLDerivativeKernel / CLChannelExtract
+ - CLFastCornersKernel / CLFastCorners
+ - CLMeanStdDevKernel / CLMeanStdDev
- New Neon kernels / functions:
- HOG / SVM: NEHOGOrientationBinningKernel, NEHOGBlockNormalizationKernel, NEHOGDetectorKernel, NEHOGNonMaximaSuppressionKernel / NEHOGDescriptor, NEHOGDetector, NEHOGGradient, NEHOGMultiDetection
- NENonLinearFilterKernel / NENonLinearFilter
@@ -1893,8 +1936,6 @@ Compute Library requires OpenCL 1.1 and above with support of non uniform workgr
Enabling 16-bit floating point calculations require \a cl_khr_fp16 extension to be supported. All Mali GPUs with compute capabilities have native support for half precision floating points.
-Use of @ref CLMeanStdDev function requires 64-bit atomics support, thus \a cl_khr_int64_base_atomics should be supported in order to use.
-
@subsubsection S3_7_2_cl_performance_requirements Performance improvements
Integer dot product built-in function extensions (and therefore optimized kernels) are available with Mali OpenCL DDK r22p0 and above for the following GPUs : G71, G76. The relevant extensions are \a cl_arm_integer_dot_product_int8, \a cl_arm_integer_dot_product_accumulate_int8 and \a cl_arm_integer_dot_product_accumulate_int16.
diff --git a/docs/01_library.dox b/docs/01_library.dox
index 641fc3e11b..5cd33b67a6 100644
--- a/docs/01_library.dox
+++ b/docs/01_library.dox
@@ -191,7 +191,7 @@ This is a very basic implementation which was originally used in the Neon runtim
Functions will automatically allocate the temporary buffers mentioned above, and will automatically multi-thread kernels' executions using the very basic scheduler described in the previous section.
-Simple functions only call a single kernel (e.g @ref NEConvolution3x3), while more complex ones consist of several kernels pipelined together (e.g @ref NEFullyConnectedLayer ). Check their documentation to find out which kernels are used by each function.
+Simple functions only call a single kernel (e.g NEConvolution3x3), while more complex ones consist of several kernels pipelined together (e.g @ref NEFullyConnectedLayer ). Check their documentation to find out which kernels are used by each function.
@code{.cpp}
//Create a function object:
@@ -225,9 +225,6 @@ If the library is compiled with embed_kernels=0 the application can set the path
In order to block until all the jobs in the CLScheduler's command queue are done executing the user can call @ref CLScheduler::sync() or create a sync event using @ref CLScheduler::enqueue_sync_event()
-For example:
-@snippet cl_events.cpp OpenCL events
-
@subsection S4_4_2_cl_neon OpenCL / Neon interoperability
You can mix OpenCL and Neon kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects.
@@ -260,8 +257,6 @@ There are different ways padding can be calculated:
- Accurate padding:
-@snippet neon_convolution.cpp Accurate padding
-
@note It's important to call allocate @b after the function is configured: if the image / tensor is already allocated then the function will shrink its execution window instead of increasing the padding. (See below for more details).
- Manual padding / no padding / auto padding: You can allocate your images / tensors up front (before configuring your functions). In that case the function will use whatever padding is available and will shrink its execution window if there isn't enough padding available (which translates into a smaller valid region for the output). See also @ref valid_region).
diff --git a/docs/06_functions_list.dox b/docs/06_functions_list.dox
index 1f4794f131..0c5145cdc8 100644
--- a/docs/06_functions_list.dox
+++ b/docs/06_functions_list.dox
@@ -38,8 +38,6 @@ namespace arm_compute
- @ref NEBoundingBoxTransform
- @ref NECast
- @ref NEComplexPixelWiseMultiplication
- - @ref NEConvolution3x3
- - @ref NEConvolutionRectangle
- @ref NEElementwiseComparison
- @ref NEElementwiseComparisonStatic
- @ref NEElementwiseDivision
@@ -50,7 +48,6 @@ namespace arm_compute
- @ref NELogicalAnd
- @ref NELogicalNot
- @ref NELogicalOr
- - @ref NENonMaximaSuppression3x3
- @ref NEPixelWiseMultiplication
- @ref NEPReluLayer
- @ref NEROIAlignLayer
@@ -90,7 +87,6 @@ namespace arm_compute
- @ref NEConvertFullyConnectedWeights
- @ref NEConvolutionLayer
- @ref NEConvolutionLayerReshapeWeights
- - @ref NEConvolutionSquare &lt;matrix_size&gt;
- @ref NECropResize
- @ref NEDeconvolutionLayer
- @ref NEDepthwiseConvolutionAssemblyDispatch
@@ -140,12 +136,10 @@ namespace arm_compute
- @ref IFunction
- @ref CLBatchNormalizationLayer
- @ref CLBatchToSpaceLayer
- - @ref CLCannyEdge
- @ref CLComplexPixelWiseMultiplication
- @ref CLConcatenateLayer
- @ref CLConvolutionLayer
- @ref CLConvolutionLayerReshapeWeights
- - @ref CLConvolutionSquare &lt;matrix_size&gt;
- @ref CLCropResize
- @ref CLDeconvolutionLayer
- @ref CLDeconvolutionLayerUpsample
@@ -154,32 +148,17 @@ namespace arm_compute
- @ref CLDequantizationLayer
- @ref CLDirectConvolutionLayer
- @ref CLDirectDeconvolutionLayer
- - @ref CLEqualizeHistogram
- - @ref CLFastCorners
- @ref CLFFT1D
- @ref CLFFT2D
- @ref CLFFTConvolutionLayer
- @ref CLFullyConnectedLayer
- @ref CLFuseBatchNormalization
- - @ref CLGaussian5x5
- - @ref CLGaussianPyramid
- - @ref CLGaussianPyramidHalf
- - @ref CLGaussianPyramidOrb
- @ref CLGEMM
- @ref CLGEMMConvolutionLayer
- @ref CLGEMMDeconvolutionLayer
- @ref CLGEMMLowpMatrixMultiplyCore
- @ref CLGenerateProposalsLayer
- - @ref CLHarrisCorners
- - @ref CLHistogram
- - @ref CLHOGDescriptor
- - @ref CLHOGDetector
- - @ref CLHOGGradient
- - @ref CLHOGMultiDetection
- - @ref CLIntegralImage
- @ref CLL2NormalizeLayer
- - @ref CLLaplacianPyramid
- - @ref CLLaplacianReconstruct
- @ref CLLogicalAnd
- @ref CLLogicalNot
- @ref CLLogicalOr
@@ -187,18 +166,13 @@ namespace arm_compute
- @ref CLLSTMLayerQuantized
- @ref CLQLSTMLayer
- @ref CLMaxUnpoolingLayer
- - @ref CLMeanStdDev
- - @ref CLMinMaxLocation
- @ref CLNormalizationLayer
- @ref CLNormalizePlanarYUVLayer
- - @ref CLOpticalFlow
- @ref CLPadLayer
- @ref CLQuantizationLayer
- @ref CLReduceMean
- @ref CLReductionOperation
- @ref CLRNNLayer
- - @ref CLSobel5x5
- - @ref CLSobel7x7
- @ref CLSoftmaxLayerGeneric &lt;IS_LOG&gt;
- @ref CLSpaceToBatchLayer
- @ref CLSpaceToDepthLayer
@@ -207,10 +181,6 @@ namespace arm_compute
- @ref CLUnstack
- @ref CLWinogradConvolutionLayer
- @ref ICLSimpleFunction
- - @ref CLAbsoluteDifference
- - @ref CLAccumulate
- - @ref CLAccumulateSquared
- - @ref CLAccumulateWeighted
- @ref CLActivationLayer
- @ref CLArgMinMaxLayer
- @ref CLArithmeticAddition
@@ -221,25 +191,16 @@ namespace arm_compute
- @ref CLBitwiseOr
- @ref CLBitwiseXor
- @ref CLBoundingBoxTransform
- - @ref CLBox3x3
- @ref CLCast
- - @ref CLChannelCombine
- - @ref CLChannelExtract
- @ref CLChannelShuffleLayer
- - @ref CLColorConvert
- @ref CLComparison
- @ref CLComparisonStatic
- @ref CLConvertFullyConnectedWeights
- - @ref CLConvolution3x3
- - @ref CLConvolutionRectangle
- @ref CLCopy
- @ref CLDepthConvertLayer
- - @ref CLDerivative
- - @ref CLDilate
- @ref CLElementwiseMax
- @ref CLElementwiseMin
- @ref CLElementwiseSquaredDiff
- - @ref CLErode
- @ref CLExpLayer
- @ref CLFill
- @ref CLFillBorder
@@ -247,23 +208,16 @@ namespace arm_compute
- @ref CLFloor
- @ref CLFullyConnectedLayerReshapeWeights
- @ref CLGather
- - @ref CLGaussian3x3
- @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
- @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
- @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
- - @ref CLMagnitude
- @ref CLMeanStdDevNormalizationLayer
- - @ref CLMedian3x3
- - @ref CLNonLinearFilter
- - @ref CLNonMaximaSuppression3x3
- @ref CLPermute
- - @ref CLPhase
- @ref CLPixelWiseMultiplication
- @ref CLPoolingLayer
- @ref CLPReluLayer
- @ref CLPriorBoxLayer
- @ref CLRange
- - @ref CLRemap
- @ref CLReorgLayer
- @ref CLReshapeLayer
- @ref CLReverse
@@ -271,17 +225,11 @@ namespace arm_compute
- @ref CLROIPoolingLayer
- @ref CLRsqrtLayer
- @ref CLScale
- - @ref CLScharr3x3
- @ref CLSelect
- @ref CLSlice
- - @ref CLSobel3x3
- @ref CLStridedSlice
- - @ref CLTableLookup
- - @ref CLThreshold
- @ref CLTile
- @ref CLTranspose
- - @ref CLWarpAffine
- - @ref CLWarpPerspective
- @ref CLWinogradInputTransform
@section S6_3 CPP functions
diff --git a/examples/cl_convolution.cpp b/examples/cl_convolution.cpp
deleted file mode 100644
index bfa53f3379..0000000000
--- a/examples/cl_convolution.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
-#error "This example needs to be built with -DARM_COMPUTE_CL"
-#endif /* ARM_COMPUTE_CL */
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLConvolution.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-/** Gaussian 3x3 matrix
- */
-const std::array<int16_t, 9> gaussian3x3 =
-{
- 1, 2, 1,
- 2, 4, 2,
- 1, 2, 1
-};
-
-/** Gaussian 5x5 matrix
- */
-const std::array<int16_t, 25> gaussian5x5 =
-{
- 1, 4, 6, 4, 1,
- 4, 16, 24, 16, 4,
- 6, 24, 36, 24, 6,
- 4, 16, 24, 16, 4,
- 1, 4, 6, 4, 1
-};
-
-class CLConvolutionExample : public Example
-{
-public:
- bool do_setup(int argc, char **argv) override
- {
- PPMLoader ppm;
-
- CLScheduler::get().default_init();
-
- if(argc < 2)
- {
- // Print help
- std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n";
- std::cout << "No input_image provided, creating a dummy 640x480 image\n";
- // Create an empty grayscale 640x480 image
- src.allocator()->init(TensorInfo(640, 480, Format::U8));
- }
- else
- {
- ppm.open(argv[1]);
- ppm.init_image(src, Format::U8);
- }
-
- // Configure the temporary and destination images
- tmp.allocator()->init(*src.info());
- dst.allocator()->init(*src.info());
-
- // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5:
- conv3x3.configure(&src, &tmp, gaussian3x3.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
- conv5x5.configure(&tmp, &dst, gaussian5x5.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
-
- // Allocate all the images
- src.allocator()->allocate();
- tmp.allocator()->allocate();
- dst.allocator()->allocate();
- // Fill the input image with the content of the PPM image if a filename was provided:
- if(ppm.is_open())
- {
- ppm.fill_image(src);
- output_filename = std::string(argv[1]) + "_out.ppm";
- }
-
- return true;
- }
- void do_run() override
- {
- // Execute the functions:
- conv3x3.run();
- conv5x5.run();
-
- // Make sure all the OpenCL jobs are done executing:
- CLScheduler::get().sync();
- }
- void do_teardown() override
- {
- // Save the result to file:
- if(!output_filename.empty())
- {
- save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
- }
- }
-
-private:
- CLImage src{};
- CLImage tmp{};
- CLImage dst{};
- CLConvolution3x3 conv3x3{};
- CLConvolution5x5 conv5x5{};
- std::string output_filename{};
-};
-
-/** Main program for convolution test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
- return utils::run_example<CLConvolutionExample>(argc, argv);
-}
diff --git a/examples/cl_events.cpp b/examples/cl_events.cpp
deleted file mode 100644
index 27c063cbc9..0000000000
--- a/examples/cl_events.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
-#error "This example needs to be built with -DARM_COMPUTE_CL"
-#endif /* ARM_COMPUTE_CL */
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
-#include "arm_compute/runtime/CL/functions/CLScale.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-class CLEventsExample : public Example
-{
-public:
- bool do_setup(int argc, char **argv) override
- {
- /** [OpenCL events] **/
- PPMLoader ppm;
- constexpr int scale_factor = 2;
-
- CLScheduler::get().default_init();
-
- if(argc < 2)
- {
- // Print help
- std::cout << "Usage: ./build/cl_events [input_image.ppm]\n\n";
- std::cout << "No input_image provided, creating a dummy 640x480 image\n";
- // Create an empty grayscale 640x480 image
- src.allocator()->init(TensorInfo(640, 480, Format::U8));
- }
- else
- {
- ppm.open(argv[1]);
- ppm.init_image(src, Format::U8);
- }
-
- TensorInfo dst_info(src.info()->dimension(0) / scale_factor, src.info()->dimension(1) / scale_factor, Format::U8);
-
- // Configure the temporary and destination images
- dst.allocator()->init(dst_info);
- tmp_scale_median.allocator()->init(dst_info);
- tmp_median_gauss.allocator()->init(dst_info);
-
- //Configure the functions:
- scale.configure(&src, &tmp_scale_median, ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE });
- median.configure(&tmp_scale_median, &tmp_median_gauss, BorderMode::REPLICATE);
- gauss.configure(&tmp_median_gauss, &dst, BorderMode::REPLICATE);
-
- // Allocate all the images
- src.allocator()->allocate();
- dst.allocator()->allocate();
- tmp_scale_median.allocator()->allocate();
- tmp_median_gauss.allocator()->allocate();
-
- // Fill the input image with the content of the PPM image if a filename was provided:
- if(ppm.is_open())
- {
- ppm.fill_image(src);
- output_filename = std::string(argv[1]) + "_out.ppm";
- }
- /** [OpenCL events] **/
-
- return true;
- }
- void do_run() override
- {
- // Enqueue and flush the scale OpenCL kernel:
- scale.run();
- // Create a synchronisation event between scale and median:
- cl::Event scale_event = CLScheduler::get().enqueue_sync_event();
- // Enqueue and flush the median OpenCL kernel:
- median.run();
- // Enqueue and flush the Gaussian OpenCL kernel:
- gauss.run();
-
- //Make sure all the OpenCL jobs are done executing:
- scale_event.wait(); // Block until Scale is done executing (Median3x3 and Gaussian5x5 might still be running)
- CLScheduler::get().sync(); // Block until Gaussian5x5 is done executing
- }
- void do_teardown() override
- {
- // Save the result to file:
- if(!output_filename.empty())
- {
- save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
- }
- }
-
-private:
- CLImage src{}, tmp_scale_median{}, tmp_median_gauss{}, dst{};
- CLScale scale{};
- CLMedian3x3 median{};
- CLGaussian5x5 gauss{};
- std::string output_filename{};
-};
-
-/** Main program for convolution test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
- return utils::run_example<CLEventsExample>(argc, argv);
-}
diff --git a/examples/neon_convolution.cpp b/examples/neon_convolution.cpp
deleted file mode 100644
index 0b33c76d51..0000000000
--- a/examples/neon_convolution.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/NEFunctions.h"
-
-#include "arm_compute/core/Types.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-/** Gaussian 3x3 matrix
- */
-const std::array<int16_t, 9> gaussian3x3 =
-{
- 1, 2, 1,
- 2, 4, 2,
- 1, 2, 1
-};
-
-/** Gaussian 5x5 matrix
- */
-const std::array<int16_t, 25> gaussian5x5 =
-{
- 1, 4, 6, 4, 1,
- 4, 16, 24, 16, 4,
- 6, 24, 36, 24, 6,
- 4, 16, 24, 16, 4,
- 1, 4, 6, 4, 1
-};
-
-class NEONConvolutionExample : public Example
-{
-public:
- bool do_setup(int argc, char **argv) override
- {
- /** [Accurate padding] **/
- PPMLoader ppm;
-
- if(argc < 2)
- {
- // Print help
- std::cout << "Usage: ./build/neon_convolution [input_image.ppm]\n\n";
- std::cout << "No input_image provided, creating a dummy 640x480 image\n";
- // Initialize just the dimensions and format of your buffers:
- src.allocator()->init(TensorInfo(640, 480, Format::U8));
- }
- else
- {
- ppm.open(argv[1]);
- // Initialize just the dimensions and format of your buffers:
- ppm.init_image(src, Format::U8);
- }
-
- // Initialize just the dimensions and format of the temporary and destination images:
- tmp.allocator()->init(*src.info());
- dst.allocator()->init(*src.info());
-
- // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5:
- // The function will automatically update the padding information inside input and output to match its requirements
- conv3x3.configure(&src, &tmp, gaussian3x3.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
- conv5x5.configure(&tmp, &dst, gaussian5x5.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
-
- // Now that the padding requirements are known we can allocate the images:
- src.allocator()->allocate();
- tmp.allocator()->allocate();
- dst.allocator()->allocate();
-
- // Fill the input image with the content of the PPM image if a filename was provided:
- if(ppm.is_open())
- {
- ppm.fill_image(src);
- output_filename = std::string(argv[1]) + "_out.ppm";
- }
- /** [Accurate padding] **/
-
- return true;
- }
- void do_run() override
- {
- //Execute the functions:
- conv3x3.run();
- conv5x5.run();
- }
- void do_teardown() override
- {
- // Save the result to file:
- if(!output_filename.empty())
- {
- save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
- }
- }
-
-private:
- Image src{}, tmp{}, dst{};
- NEConvolution3x3 conv3x3{};
- NEConvolution5x5 conv5x5{};
- std::string output_filename{};
-};
-
-/** Main program for convolution test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
- return utils::run_example<NEONConvolutionExample>(argc, argv);
-}
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 75f76ea344..14d3a2cad5 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -177,10 +177,6 @@ std::string decompress_zlib(const std::string &str)
using namespace arm_compute;
const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{
- { "absdiff", "absdiff.cl" },
- { "accumulate", "accumulate.cl" },
- { "accumulate_squared", "accumulate.cl" },
- { "accumulate_weighted", "accumulate.cl" },
{ "activation_layer", "activation_layer.cl" },
{ "activation_layer_quant", "activation_layer_quant.cl" },
{ "activation_layer_quant_f32", "activation_layer_quant.cl" },
@@ -200,21 +196,8 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "bitwise_not", "bitwise_op.cl" },
{ "bounding_box_transform", "bounding_box_transform.cl" },
{ "bounding_box_transform_quantized", "bounding_box_transform_quantized.cl" },
- { "channel_combine_NV", "channel_combine.cl" },
- { "channel_combine_RGB888", "channel_combine.cl" },
- { "channel_combine_RGBA8888", "channel_combine.cl" },
- { "channel_combine_UYVY422", "channel_combine.cl" },
- { "channel_combine_YUYV422", "channel_combine.cl" },
{ "channel_shuffle_nchw", "channel_shuffle.cl" },
{ "channel_shuffle_nhwc", "channel_shuffle.cl" },
- { "channel_extract_NV12", "channel_extract.cl" },
- { "channel_extract_NV21", "channel_extract.cl" },
- { "channel_extract_RGB888", "channel_extract.cl" },
- { "channel_extract_RGBA8888", "channel_extract.cl" },
- { "channel_extract_UYVY422", "channel_extract.cl" },
- { "channel_extract_YUYV422", "channel_extract.cl" },
- { "combine_gradients_L1", "canny.cl" },
- { "combine_gradients_L2", "canny.cl" },
{ "compare_equal", "comparisons.cl" },
{ "compare_equal_quantized", "comparisons.cl" },
{ "compare_notequal", "comparisons.cl" },
@@ -232,25 +215,11 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "concatenate_height", "concatenate.cl" },
{ "concatenate_width_x2", "concatenate.cl" },
{ "concatenate_width_x4", "concatenate.cl" },
- { "convolution_rectangle", "convolution_rectangle.cl" },
{ "col2im", "col2im.cl" },
{ "convert_depth_down", "depth_convert.cl" },
{ "convert_depth_up", "depth_convert.cl" },
{ "convert_fc_weights", "convert_fc_weights.cl" },
- { "convolution3x3_static", "convolution3x3.cl" },
- { "convolution5x5_static", "convolution5x5.cl" },
- { "convolution7x7_static", "convolution7x7.cl" },
- { "convolution9x9_static", "convolution9x9.cl" },
- { "convolution_separable1x5_static", "convolution5x5.cl" },
- { "convolution_separable5x1_static", "convolution5x5.cl" },
- { "convolution_separable1x7_static", "convolution7x7.cl" },
- { "convolution_separable7x1_static", "convolution7x7.cl" },
- { "convolution_separable1x9_static", "convolution9x9.cl" },
- { "convolution_separable9x1_static", "convolution9x9.cl" },
{ "copy_tensor", "copy_tensor.cl" },
- { "copy_plane", "channel_extract.cl" },
- { "copy_planes_3p", "channel_combine.cl" },
- { "copy_to_keypoint", "fast_corners.cl" },
{ "crop_tensor", "crop_tensor.cl" },
{ "deconvolution_reshape", "deconvolution_layer.cl" },
{ "deconvolution_upsample", "deconvolution_layer.cl" },
@@ -275,8 +244,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "dequantization_layer", "dequantization_layer.cl" },
{ "dequantization_layer_per_channel_nhwc", "dequantization_layer.cl" },
{ "dequantization_layer_per_channel_nchw", "dequantization_layer.cl" },
- { "derivative", "derivative.cl" },
- { "dilate", "dilate.cl" },
{ "direct_convolution_nhwc", "direct_convolution.cl" },
{ "direct_convolution1x1", "direct_convolution1x1.cl" },
{ "direct_convolution1x1_f32_bifrost", "direct_convolution1x1.cl" },
@@ -303,8 +270,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "elementwise_operation_SQUARED_DIFF_quantized", "elementwise_operation_quantized.cl" },
{ "elementwise_operation_PRELU_quantized", "elementwise_operation_quantized.cl" },
{ "elementwise_unary", "elementwise_unary.cl" },
- { "erode", "erode.cl" },
- { "fast_corners", "fast_corners.cl" },
{ "fft_digit_reverse_axis_0", "fft_digit_reverse.cl" },
{ "fft_digit_reverse_axis_1", "fft_digit_reverse.cl" },
{ "fft_radix_2_first_stage_axis_0", "fft.cl" },
@@ -334,12 +299,9 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "fft_scale_conj", "fft_scale.cl" },
{ "fill_image_borders_constant", "fill_border.cl" },
{ "fill_image_borders_replicate", "fill_border.cl" },
- { "finalize", "optical_flow_pyramid_lk.cl" },
{ "floor_layer", "floor.cl" },
{ "fuse_batchnormalization_layer", "batchnormalization_layer.cl" },
{ "gather", "gather.cl" },
- { "gaussian1x5_sub_x", "gaussian_pyramid.cl" },
- { "gaussian5x1_sub_y", "gaussian_pyramid.cl" },
{ "gemm_ma_f16", "gemm.cl" },
{ "gemm_ma_f32", "gemm.cl" },
{ "gemm_mv", "gemv.cl" },
@@ -384,17 +346,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "gemmlowp_output_stage_quantize_down_float", "gemmlowp.cl" },
{ "generate_proposals_compute_all_anchors", "generate_proposals.cl" },
{ "generate_proposals_compute_all_anchors_quantized", "generate_proposals_quantized.cl" },
- { "harris_score_3x3", "harris_corners.cl" },
- { "harris_score_5x5", "harris_corners.cl" },
- { "harris_score_7x7", "harris_corners.cl" },
- { "hist_border_kernel", "histogram.cl" },
- { "hist_border_kernel_fixed", "histogram.cl" },
- { "hist_local_kernel", "histogram.cl" },
- { "hist_local_kernel_fixed", "histogram.cl" },
- { "hog_block_normalization", "hog.cl" },
- { "hog_detector", "hog.cl" },
- { "hog_orientation_binning", "hog.cl" },
- { "hysteresis", "canny.cl" },
{ "im2col1x1_stridex1_nchw", "im2col.cl" },
{ "im2col3x3_nchw", "im2col.cl" },
{ "im2col5x5_nchw", "im2col.cl" },
@@ -404,36 +355,14 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "im2col3x3_nhwc", "im2col.cl" },
{ "im2col9x9_nhwc", "im2col.cl" },
{ "im2col_generic_nhwc", "im2col.cl" },
- { "init_level", "optical_flow_pyramid_lk.cl" },
- { "init_level_max", "optical_flow_pyramid_lk.cl" },
- { "init_level_max_initial_estimate", "optical_flow_pyramid_lk.cl" },
{ "instance_normalization", "instance_normalization.cl" },
- { "integral_horizontal", "integral_image.cl" },
- { "integral_vertical", "integral_image.cl" },
- { "IYUV_to_NV12_bt709", "color_convert.cl" },
- { "IYUV_to_RGB888_bt709", "color_convert.cl" },
- { "IYUV_to_RGBA8888_bt709", "color_convert.cl" },
- { "IYUV_to_YUV444_bt709", "color_convert.cl" },
{ "l2_normalize_x", "l2_normalize.cl" },
{ "l2_normalize_y", "l2_normalize.cl" },
{ "l2_normalize_z", "l2_normalize.cl" },
- { "lktracker_stage0", "optical_flow_pyramid_lk.cl" },
- { "lktracker_stage1", "optical_flow_pyramid_lk.cl" },
- { "magnitude_phase", "magnitude_phase.cl" },
{ "max_unpooling_layer_2", "unpooling_layer.cl" },
- { "mean_stddev_accumulate", "mean_stddev.cl" },
{ "mean_stddev_normalization", "mean_stddev_normalization.cl" },
{ "memset", "memset.cl" },
- { "minmax", "minmaxloc.cl" },
- { "minmax_border", "minmaxloc.cl" },
{ "minmax_layer", "minmax_layer.cl" },
- { "minmaxloc", "minmaxloc.cl" },
- { "non_linear_filter_box3x3", "non_linear_filter3x3.cl" },
- { "non_linear_filter_cross3x3", "non_linear_filter3x3.cl" },
- { "non_linear_filter_disk3x3", "non_linear_filter3x3.cl" },
- { "non_linear_filter_box5x5", "non_linear_filter5x5.cl" },
- { "non_linear_filter_cross5x5", "non_linear_filter5x5.cl" },
- { "non_linear_filter_disk5x5", "non_linear_filter5x5.cl" },
{ "non_max_suppression", "nonmax.cl" },
{ "normalization_layer_cross_map", "normalization_layer.cl" },
{ "normalization_layer_in_map_nchw", "normalization_layer.cl" },
@@ -442,14 +371,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "normalize_planar_yuv_layer_nhwc", "normalize_planar_yuv_layer.cl" },
{ "normalize_planar_yuv_layer_q8_nchw", "normalize_planar_yuv_layer_quantized.cl" },
{ "normalize_planar_yuv_layer_q8_nhwc", "normalize_planar_yuv_layer_quantized.cl" },
- { "NV12_to_IYUV_bt709", "color_convert.cl" },
- { "NV12_to_RGB888_bt709", "color_convert.cl" },
- { "NV12_to_RGBA8888_bt709", "color_convert.cl" },
- { "NV12_to_YUV444_bt709", "color_convert.cl" },
- { "NV21_to_IYUV_bt709", "color_convert.cl" },
- { "NV21_to_RGB888_bt709", "color_convert.cl" },
- { "NV21_to_RGBA8888_bt709", "color_convert.cl" },
- { "NV21_to_YUV444_bt709", "color_convert.cl" },
{ "pad_layer_constant", "pad_layer.cl" },
{ "pad_layer_symmetric_reflect", "pad_layer.cl" },
{ "permute", "permute.cl" },
@@ -485,15 +406,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "reshape_layer", "reshape_layer.cl" },
{ "reshape_to_columns", "convolution_layer.cl" },
{ "reverse", "reverse.cl" },
- { "RGB888_to_IYUV_bt709", "color_convert.cl" },
- { "RGB888_to_NV12_bt709", "color_convert.cl" },
- { "RGB888_to_RGBA8888_bt709", "color_convert.cl" },
- { "RGB888_to_U8_bt709", "color_convert.cl" },
- { "RGB888_to_YUV444_bt709", "color_convert.cl" },
- { "RGBA8888_to_IYUV_bt709", "color_convert.cl" },
- { "RGBA8888_to_NV12_bt709", "color_convert.cl" },
- { "RGBA8888_to_RGB888_bt709", "color_convert.cl" },
- { "RGBA8888_to_YUV444_bt709", "color_convert.cl" },
{ "roi_align_layer", "roi_align_layer.cl" },
{ "roi_align_layer_quantized", "roi_align_layer_quantized.cl" },
{ "roi_pooling_layer", "roi_pooling_layer.cl" },
@@ -503,15 +415,9 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "scale_bilinear_nhwc", "scale.cl" },
{ "scale_bilinear_quantized_nchw", "scale_quantized.cl" },
{ "scale_bilinear_quantized_nhwc", "scale_quantized.cl" },
- { "scharr3x3", "scharr_filter.cl" },
{ "select_same_rank", "select.cl" },
{ "select_different_rank_2", "select.cl" },
{ "select_different_rank_n", "select.cl" },
- { "sobel3x3", "sobel_filter.cl" },
- { "sobel_separable5x1", "sobel_filter.cl" },
- { "sobel_separable1x5", "sobel_filter.cl" },
- { "sobel_separable7x1", "sobel_filter.cl" },
- { "sobel_separable1x7", "sobel_filter.cl" },
{ "softmax_layer_norm", "softmax_layer.cl" },
{ "softmax_layer_norm_quantized", "softmax_layer_quantized.cl" },
{ "softmax_layer_max_shift_exp_sum_quantized_serial", "softmax_layer_quantized.cl" },
@@ -526,23 +432,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "softmax_layer_max_shift_exp_sum_parallel", "softmax_layer.cl" },
{ "stack_layer", "stack_layer.cl" },
{ "strided_slice", "slice_ops.cl" },
- { "suppress_non_maximum", "canny.cl" },
- { "tablelookup_U8", "tablelookup.cl" },
- { "tablelookup_S16", "tablelookup.cl" },
- { "threshold_binary", "threshold.cl" },
- { "threshold_range", "threshold.cl" },
{ "tile", "tile.cl" },
{ "transpose", "transpose.cl" },
- { "UYVY422_to_IYUV_bt709", "color_convert.cl" },
- { "UYVY422_to_NV12_bt709", "color_convert.cl" },
- { "UYVY422_to_RGB888_bt709", "color_convert.cl" },
- { "UYVY422_to_RGBA8888_bt709", "color_convert.cl" },
{ "upsample_layer_nchw", "upsample_layer.cl" },
{ "upsample_layer_nhwc", "upsample_layer.cl" },
- { "warp_affine_nearest_neighbour", "warp_affine.cl" },
- { "warp_affine_bilinear", "warp_affine.cl" },
- { "warp_perspective_nearest_neighbour", "warp_perspective.cl" },
- { "warp_perspective_bilinear", "warp_perspective.cl" },
{ "winograd_filter_transform_2x2_3x3_nchw", "winograd_filter_transform.cl" },
{ "winograd_filter_transform_2x1_3x1_nchw", "winograd_filter_transform.cl" },
{ "winograd_filter_transform_1x2_1x3_nchw", "winograd_filter_transform.cl" },
@@ -602,24 +495,12 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "winograd_output_transform_1x2_1x7_nhwc", "winograd_output_transform.cl" },
{ "yolo_layer_nchw", "yolo_layer.cl" },
{ "yolo_layer_nhwc", "yolo_layer.cl" },
- { "YUYV422_to_IYUV_bt709", "color_convert.cl" },
- { "YUYV422_to_NV12_bt709", "color_convert.cl" },
- { "YUYV422_to_RGB888_bt709", "color_convert.cl" },
- { "YUYV422_to_RGBA8888_bt709", "color_convert.cl" },
};
const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
{
#ifdef EMBEDDED_KERNELS
{
- "absdiff.cl",
-#include "./cl_kernels/absdiff.clembed"
- },
- {
- "accumulate.cl",
-#include "./cl_kernels/accumulate.clembed"
- },
- {
"activation_layer.cl",
#include "./cl_kernels/activation_layer.clembed"
},
@@ -648,18 +529,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/bounding_box_transform_quantized.clembed"
},
{
- "canny.cl",
-#include "./cl_kernels/canny.clembed"
- },
- {
- "channel_combine.cl",
-#include "./cl_kernels/channel_combine.clembed"
- },
- {
- "channel_extract.cl",
-#include "./cl_kernels/channel_extract.clembed"
- },
- {
"channel_shuffle.cl",
#include "./cl_kernels/channel_shuffle.clembed"
},
@@ -676,38 +545,14 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/concatenate.clembed"
},
{
- "color_convert.cl",
-#include "./cl_kernels/color_convert.clembed"
- },
- {
"convert_fc_weights.cl",
#include "./cl_kernels/convert_fc_weights.clembed"
- },
- {
- "convolution3x3.cl",
-#include "./cl_kernels/convolution3x3.clembed"
- },
- {
- "convolution5x5.cl",
-#include "./cl_kernels/convolution5x5.clembed"
- },
- {
- "convolution7x7.cl",
-#include "./cl_kernels/convolution7x7.clembed"
- },
- {
- "convolution9x9.cl",
-#include "./cl_kernels/convolution9x9.clembed"
- },
+ },
{
"convolution_layer.cl",
#include "./cl_kernels/convolution_layer.clembed"
},
{
- "convolution_rectangle.cl",
-#include "./cl_kernels/convolution_rectangle.clembed"
- },
- {
"copy_tensor.cl",
#include "./cl_kernels/copy_tensor.clembed"
},
@@ -744,14 +589,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/dequantization_layer.clembed"
},
{
- "derivative.cl",
-#include "./cl_kernels/derivative.clembed"
- },
- {
- "dilate.cl",
-#include "./cl_kernels/dilate.clembed"
- },
- {
"direct_convolution1x1.cl",
#include "./cl_kernels/direct_convolution1x1.clembed"
},
@@ -784,14 +621,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/elementwise_unary.clembed"
},
{
- "erode.cl",
-#include "./cl_kernels/erode.clembed"
- },
- {
- "fast_corners.cl",
-#include "./cl_kernels/fast_corners.clembed"
- },
- {
"fft.cl",
#include "./cl_kernels/fft.clembed"
},
@@ -816,10 +645,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/gather.clembed"
},
{
- "gaussian_pyramid.cl",
-#include "./cl_kernels/gaussian_pyramid.clembed"
- },
- {
"gemm.cl",
#include "./cl_kernels/gemm.clembed"
},
@@ -844,10 +669,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/generate_proposals_quantized.clembed"
},
{
- "harris_corners.cl",
-#include "./cl_kernels/harris_corners.clembed"
- },
- {
"helpers.h",
#include "./cl_kernels/helpers.hembed"
},
@@ -856,14 +677,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/helpers_asymm.hembed"
},
{
- "histogram.cl",
-#include "./cl_kernels/histogram.clembed"
- },
- {
- "hog.cl",
-#include "./cl_kernels/hog.clembed"
- },
- {
"im2col.cl",
#include "./cl_kernels/im2col.clembed"
},
@@ -872,22 +685,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/instance_normalization.clembed"
},
{
- "integral_image.cl",
-#include "./cl_kernels/integral_image.clembed"
- },
- {
"l2_normalize.cl",
#include "./cl_kernels/l2_normalize.clembed"
},
{
- "magnitude_phase.cl",
-#include "./cl_kernels/magnitude_phase.clembed"
- },
- {
- "mean_stddev.cl",
-#include "./cl_kernels/mean_stddev.clembed"
- },
- {
"mean_stddev_normalization.cl",
#include "./cl_kernels/mean_stddev_normalization.clembed"
},
@@ -896,26 +697,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/memset.clembed"
},
{
- "minmaxloc.cl",
-#include "./cl_kernels/minmaxloc.clembed"
- },
- {
"minmax_layer.cl",
#include "./cl_kernels/minmax_layer.clembed"
},
{
- "non_linear_filter3x3.cl",
-#include "./cl_kernels/non_linear_filter3x3.clembed"
- },
- {
- "non_linear_filter5x5.cl",
-#include "./cl_kernels/non_linear_filter5x5.clembed"
- },
- {
- "non_linear_filter_helpers.h",
-#include "./cl_kernels/non_linear_filter_helpers.hembed"
- },
- {
"nonmax.cl",
#include "./cl_kernels/nonmax.clembed"
},
@@ -936,10 +721,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/batchnormalization_layer.clembed"
},
{
- "optical_flow_pyramid_lk.cl",
-#include "./cl_kernels/optical_flow_pyramid_lk.clembed"
- },
- {
"pad_layer.cl",
#include "./cl_kernels/pad_layer.clembed"
},
@@ -1020,18 +801,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/scale_quantized.clembed"
},
{
- "scharr_filter.cl",
-#include "./cl_kernels/scharr_filter.clembed"
- },
- {
"select.cl",
#include "./cl_kernels/select.clembed"
},
{
- "sobel_filter.cl",
-#include "./cl_kernels/sobel_filter.clembed"
- },
- {
"softmax_layer.cl",
#include "./cl_kernels/softmax_layer.clembed"
},
@@ -1056,14 +829,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/stack_layer.clembed"
},
{
- "tablelookup.cl",
-#include "./cl_kernels/tablelookup.clembed"
- },
- {
- "threshold.cl",
-#include "./cl_kernels/threshold.clembed"
- },
- {
"tile.cl",
#include "./cl_kernels/tile.clembed"
},
@@ -1080,18 +845,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/unpooling_layer.clembed"
},
{
- "warp_affine.cl",
-#include "./cl_kernels/warp_affine.clembed"
- },
- {
- "warp_helpers.h",
-#include "./cl_kernels/warp_helpers.hembed"
- },
- {
- "warp_perspective.cl",
-#include "./cl_kernels/warp_perspective.clembed"
- },
- {
"winograd_filter_transform.cl",
#include "./cl_kernels/winograd_filter_transform.clembed"
},
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index 7383dce40f..22c9cd9c0c 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -25,23 +25,15 @@
#define ARM_COMPUTE_CLKERNELS_H
/* Header regrouping all the CL kernels */
-#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-#include "src/core/CL/kernels/CLAccumulateKernel.h"
#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "src/core/CL/kernels/CLBitwiseKernel.h"
#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "src/core/CL/kernels/CLBox3x3Kernel.h"
-#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "src/core/CL/kernels/CLChannelCombineKernel.h"
-#include "src/core/CL/kernels/CLChannelExtractKernel.h"
#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
#include "src/core/CL/kernels/CLCol2ImKernel.h"
-#include "src/core/CL/kernels/CLColorConvertKernel.h"
#include "src/core/CL/kernels/CLComparisonKernel.h"
#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
-#include "src/core/CL/kernels/CLConvolutionKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
@@ -51,14 +43,10 @@
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "src/core/CL/kernels/CLDerivativeKernel.h"
-#include "src/core/CL/kernels/CLDilateKernel.h"
#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "src/core/CL/kernels/CLErodeKernel.h"
#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
#include "src/core/CL/kernels/CLFFTScaleKernel.h"
-#include "src/core/CL/kernels/CLFastCornersKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
@@ -77,28 +65,14 @@
#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/CL/kernels/CLGatherKernel.h"
-#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
-#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
-#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
-#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
-#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "src/core/CL/kernels/CLHistogramKernel.h"
#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
-#include "src/core/CL/kernels/CLIntegralImageKernel.h"
#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "src/core/CL/kernels/CLLKTrackerKernel.h"
-#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
-#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
-#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
-#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
-#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
-#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "src/core/CL/kernels/CLPadLayerKernel.h"
@@ -114,22 +88,14 @@
#include "src/core/CL/kernels/CLReorgLayerKernel.h"
#include "src/core/CL/kernels/CLReverseKernel.h"
#include "src/core/CL/kernels/CLScaleKernel.h"
-#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
#include "src/core/CL/kernels/CLSelectKernel.h"
-#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
-#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
-#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
#include "src/core/CL/kernels/CLStackLayerKernel.h"
#include "src/core/CL/kernels/CLStridedSliceKernel.h"
-#include "src/core/CL/kernels/CLTableLookupKernel.h"
-#include "src/core/CL/kernels/CLThresholdKernel.h"
#include "src/core/CL/kernels/CLTileKernel.h"
#include "src/core/CL/kernels/CLTransposeKernel.h"
-#include "src/core/CL/kernels/CLWarpAffineKernel.h"
-#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
diff --git a/src/core/CL/cl_kernels/absdiff.cl b/src/core/CL/cl_kernels/absdiff.cl
deleted file mode 100644
index a09caf5dc5..0000000000
--- a/src/core/CL/cl_kernels/absdiff.cl
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** Calculate the absolute difference of two input images.
- *
- * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:\n
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=short
- *
- * @param[in] in1_ptr Pointer to the first source image. Supported data types: U8, S16
- * @param[in] in1_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] in1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] in1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[in] in2_ptr Pointer to the second source image. Supported data types: U8, S16
- * @param[in] in2_stride_x Stride of the second source image in X dimension (in bytes)
- * @param[in] in2_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the second source image in Y dimension (in bytes)
- * @param[in] in2_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the second source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void absdiff(
- IMAGE_DECLARATION(in1),
- IMAGE_DECLARATION(in2),
- IMAGE_DECLARATION(out))
-{
- Image in1 = CONVERT_TO_IMAGE_STRUCT(in1);
- Image in2 = CONVERT_TO_IMAGE_STRUCT(in2);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
-
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- in_a = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- in_b = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
-
- vstore16(CONVERT_SAT(abs_diff(in_a, in_b), VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
diff --git a/src/core/CL/cl_kernels/accumulate.cl b/src/core/CL/cl_kernels/accumulate.cl
deleted file mode 100644
index 9e37830f1b..0000000000
--- a/src/core/CL/cl_kernels/accumulate.cl
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This function accumulates an input image into output image.
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] accu_ptr Pointer to the destination image. Supported data types: S16
- * @param[in] accu_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] accu_step_x accu_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] accu_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] accu_step_y accu_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] accu_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void accumulate(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(accu))
-{
- // Get pixels pointer
- Image input = CONVERT_TO_IMAGE_STRUCT(input);
- Image accu = CONVERT_TO_IMAGE_STRUCT(accu);
-
- // Load data
- uchar16 in_data = vload16(0, input.ptr);
- short16 accu_data = vload16(0, (__global short *)accu.ptr);
-
- // Perform accumulation
- short16 res = add_sat(convert_short16(in_data), accu_data);
-
- // Store result
- vstore16(res, 0, (__global short *)accu.ptr);
-}
-
-/** This function accumulates a weighted value from an input image to an output image.
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] accu_ptr Pointer to the destination image. Supported data types: S16
- * @param[in] accu_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] accu_step_x accu_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] accu_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] accu_step_y accu_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] accu_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] alpha The float scalar value with a value in the range of 0 to 1
- */
-__kernel void accumulate_weighted(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(accu),
- const float alpha)
-{
- // Get pixels pointer
- Image input = CONVERT_TO_IMAGE_STRUCT(input);
- Image accu = CONVERT_TO_IMAGE_STRUCT(accu);
-
- // Load data
- const float16 in_data = convert_float16(vload16(0, input.ptr));
- const float16 accu_data = convert_float16(vload16(0, accu.ptr));
-
- // Calculate weighted accumulation
- const uchar16 res = convert_uchar16((1.0f - alpha) * accu_data + alpha * in_data);
-
- // Store result
- vstore16(res, 0, accu.ptr);
-}
-
-/** This function accumulates a squared value from an input image to an output image.
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] accu_ptr Pointer to the destination image. Supported data types: S16
- * @param[in] accu_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] accu_step_x accu_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] accu_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] accu_step_y accu_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] accu_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] shift The U32 scalar value with a value in the range of 0 to 15
- */
-__kernel void accumulate_squared(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(accu),
- const uint shift)
-{
- // Get pixels pointer
- Image input = CONVERT_TO_IMAGE_STRUCT(input);
- Image accu = CONVERT_TO_IMAGE_STRUCT(accu);
-
- // Load data
- ushort16 in_data = convert_ushort16(vload16(0, input.ptr));
- uint16 accu_data = convert_uint16(vload16(0, (__global short *)accu.ptr));
-
- // Calculate squared accumulation
- short16 res = convert_short16_sat(accu_data + convert_uint16((in_data * in_data) >> shift));
-
- // Store result
- vstore16(res, 0, (__global short *)accu.ptr);
-}
diff --git a/src/core/CL/cl_kernels/canny.cl b/src/core/CL/cl_kernels/canny.cl
deleted file mode 100644
index bcff8438db..0000000000
--- a/src/core/CL/cl_kernels/canny.cl
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** Calculate the magnitude and phase from horizontal and vertical result of sobel result.
- *
- * @note The calculation of gradient uses level 1 normalisation.
- * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
- *
- * @param[in] src1_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32
- * @param[in] src1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src1_step_x src1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src1_step_y src1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] src2_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32
- * @param[in] src2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src2_step_x src2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src2_step_y src2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] grad_ptr Pointer to the gradient output. Supported data types: U16, U32
- * @param[in] grad_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] grad_step_x grad_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] grad_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] grad_step_y grad_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] grad_offset_first_element_in_bytes The offset of the first element of the output
- * @param[out] angle_ptr Pointer to the angle output. Supported data types: U8
- * @param[in] angle_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] angle_step_x angle_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] angle_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] angle_step_y angle_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] angle_offset_first_element_in_bytes The offset of the first element of the output
- */
-__kernel void combine_gradients_L1(
- IMAGE_DECLARATION(src1),
- IMAGE_DECLARATION(src2),
- IMAGE_DECLARATION(grad),
- IMAGE_DECLARATION(angle))
-{
- // Construct images
- Image src1 = CONVERT_TO_IMAGE_STRUCT(src1);
- Image src2 = CONVERT_TO_IMAGE_STRUCT(src2);
- Image grad = CONVERT_TO_IMAGE_STRUCT(grad);
- Image angle = CONVERT_TO_IMAGE_STRUCT(angle);
-
- // Load sobel horizontal and vertical values
- VEC_DATA_TYPE(DATA_TYPE_IN, 4)
- h = vload4(0, (__global DATA_TYPE_IN *)src1.ptr);
- VEC_DATA_TYPE(DATA_TYPE_IN, 4)
- v = vload4(0, (__global DATA_TYPE_IN *)src2.ptr);
-
- /* Calculate the gradient, using level 1 normalisation method */
- VEC_DATA_TYPE(DATA_TYPE_OUT, 4)
- m = CONVERT_SAT((abs(h) + abs(v)), VEC_DATA_TYPE(DATA_TYPE_OUT, 4));
-
- /* Calculate the angle */
- float4 p = 180.0f * atan2pi(convert_float4(v), convert_float4(h));
-
- /* Remap angle to range [0, 256) */
- p = select(p, p + 180.0f, p < 0.0f);
-
- /* Store results */
- vstore4(m, 0, (__global DATA_TYPE_OUT *)grad.ptr);
- vstore4(convert_uchar4_sat_rte(p), 0, angle.ptr);
-}
-
-/** Calculate the gradient and angle from horizontal and vertical result of sobel result.
- *
- * @note The calculation of gradient uses level 2 normalisation
- * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
- *
- * @param[in] src1_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32
- * @param[in] src1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src1_step_x src1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src1_step_y src1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] src2_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32
- * @param[in] src2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src2_step_x src2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src2_step_y src2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] grad_ptr Pointer to the gradient output. Supported data types: U16, U32
- * @param[in] grad_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] grad_step_x grad_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] grad_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] grad_step_y grad_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] grad_offset_first_element_in_bytes The offset of the first element of the output
- * @param[out] angle_ptr Pointer to the angle output. Supported data types: U8
- * @param[in] angle_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] angle_step_x angle_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] angle_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] angle_step_y angle_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] angle_offset_first_element_in_bytes The offset of the first element of the output
- */
-__kernel void combine_gradients_L2(
- IMAGE_DECLARATION(src1),
- IMAGE_DECLARATION(src2),
- IMAGE_DECLARATION(grad),
- IMAGE_DECLARATION(angle))
-{
- // Construct images
- Image src1 = CONVERT_TO_IMAGE_STRUCT(src1);
- Image src2 = CONVERT_TO_IMAGE_STRUCT(src2);
- Image grad = CONVERT_TO_IMAGE_STRUCT(grad);
- Image angle = CONVERT_TO_IMAGE_STRUCT(angle);
-
- // Load sobel horizontal and vertical values
- float4 h = convert_float4(vload4(0, (__global DATA_TYPE_IN *)src1.ptr));
- float4 v = convert_float4(vload4(0, (__global DATA_TYPE_IN *)src2.ptr));
-
- /* Calculate the gradient, using level 2 normalisation method */
- float4 m = sqrt(h * h + v * v);
-
- /* Calculate the angle */
- float4 p = 180.0f * atan2pi(v, h);
-
- /* Remap angle to range [0, 256) */
- p = select(p, p + 180.0f, p < 0.0f);
-
- /* Store results */
- vstore4(CONVERT_SAT_ROUND(m, VEC_DATA_TYPE(DATA_TYPE_OUT, 4), rte), 0, (__global DATA_TYPE_OUT *)grad.ptr);
- vstore4(convert_uchar4_sat_rte(p), 0, angle.ptr);
-}
-
-#define EDGE 255
-#define NO_EDGE 0
-
-/** Array that holds the relative coordinates offset for the neighbouring pixels.
- */
-__constant short4 neighbours_coords[] =
-{
- { -1, 0, 1, 0 }, // 0
- { -1, -1, 1, 1 }, // 45
- { 0, -1, 0, 1 }, // 90
- { 1, -1, -1, 1 }, // 135
-};
-
-/** Perform non maximum suppression.
- *
- * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
- *
- * @param[in] grad_ptr Pointer to the gradient output. Supported data types: S16, S32
- * @param[in] grad_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] grad_step_x grad_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] grad_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] grad_step_y grad_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] grad_offset_first_element_in_bytes The offset of the first element of the output
- * @param[in] angle_ptr Pointer to the angle output. Supported data types: U8
- * @param[in] angle_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] angle_step_x angle_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] angle_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] angle_step_y angle_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] angle_offset_first_element_in_bytes TThe offset of the first element of the output
- * @param[out] non_max_ptr Pointer to the non maximum suppressed output. Supported data types: U16, U32
- * @param[in] non_max_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] non_max_step_x non_max_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] non_max_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] non_max_step_y non_max_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] non_max_offset_first_element_in_bytes The offset of the first element of the output
- * @param[in] lower_thr The low threshold
- */
-__kernel void suppress_non_maximum(
- IMAGE_DECLARATION(grad),
- IMAGE_DECLARATION(angle),
- IMAGE_DECLARATION(non_max),
- uint lower_thr)
-{
- // Construct images
- Image grad = CONVERT_TO_IMAGE_STRUCT(grad);
- Image angle = CONVERT_TO_IMAGE_STRUCT(angle);
- Image non_max = CONVERT_TO_IMAGE_STRUCT(non_max);
-
- // Index
- const int x = get_global_id(0);
- const int y = get_global_id(1);
-
- // Get gradient and angle
- DATA_TYPE_IN gradient = *((__global DATA_TYPE_IN *)grad.ptr);
- uchar an = *((__global uchar *)angle.ptr);
-
- // Early return if not greater than lower threshold
- if(gradient <= lower_thr)
- {
- return;
- }
-
- // Divide the whole round into 4 directions
- DATA_TYPE_OUT q_an;
-
- if(an < 22.5f || an >= 157.5f)
- {
- q_an = 0;
- }
- else if(an < 67.5f)
- {
- q_an = 1;
- }
- else if(an < 112.5f)
- {
- q_an = 2;
- }
- else
- {
- q_an = 3;
- }
-
- // Find the two pixels in the perpendicular direction
- short2 x_p = neighbours_coords[q_an].s02;
- short2 y_p = neighbours_coords[q_an].s13;
- DATA_TYPE_IN g1 = *((global DATA_TYPE_IN *)offset(&grad, x_p.x, y_p.x));
- DATA_TYPE_IN g2 = *((global DATA_TYPE_IN *)offset(&grad, x_p.y, y_p.y));
-
- if((gradient > g1) && (gradient > g2))
- {
- __global uchar *non_max_addr = non_max_ptr + non_max_offset_first_element_in_bytes + x * non_max_stride_x + y * non_max_stride_y;
- *((global DATA_TYPE_OUT *)non_max_addr) = gradient;
- }
-}
-
-#define hysteresis_local_stack_L1 8 // The size of level 1 stack. This has to agree with the host side
-#define hysteresis_local_stack_L2 16 // The size of level 2 stack, adjust this can impact the match rate with VX implementation
-
-/** Check whether pixel is valid
- *
- * Skip the pixel if the early_test fails.
- * Otherwise, it tries to add the pixel coordinate to the stack, and proceed to popping the stack instead if the stack is full
- *
- * @param[in] early_test Boolean condition based on the minv check and visited buffer check
- * @param[in] x_pos X-coordinate of pixel that is going to be recorded, has to be within the boundary
- * @param[in] y_pos Y-coordinate of pixel that is going to be recorded, has to be within the boundary
- * @param[in] x_cur X-coordinate of current central pixel
- * @param[in] y_cur Y-coordinate of current central pixel
- */
-#define check_pixel(early_test, x_pos, y_pos, x_cur, y_cur) \
- { \
- if(!early_test) \
- { \
- /* Number of elements in the local stack 1, points to next available entry */ \
- c = *((__global char *)offset(&l1_stack_counter, x_cur, y_cur)); \
- \
- if(c > (hysteresis_local_stack_L1 - 1)) /* Stack level 1 is full */ \
- goto pop_stack; \
- \
- /* The pixel that has already been recorded is ignored */ \
- if(!atomic_or((__global uint *)offset(&recorded, x_pos, y_pos), 1)) \
- { \
- l1_ptr[c] = (short2)(x_pos, y_pos); \
- *((__global char *)offset(&l1_stack_counter, x_cur, y_cur)) += 1; \
- } \
- } \
- }
-
-/** Perform hysteresis.
- *
- * @attention The input data_type needs to be passed at compile time using -DDATA_TYPE_IN: e.g. -DDATA_TYPE_IN=short
- *
- * @param[in] src_ptr Pointer to the input image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element of the output
- * @param[out] out_ptr Pointer to the output image. Supported data types: U8
- * @param[in] out_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element of the output
- * @param[out] visited_ptr Pointer to the visited buffer, where pixels are marked as visited. Supported data types: U32
- * @param[in] visited_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] visited_step_x visited_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] visited_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] visited_step_y visited_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] visited_offset_first_element_in_bytes The offset of the first element of the output
- * @param[out] recorded_ptr Pointer to the recorded buffer, where pixels are marked as recorded. Supported data types: U32
- * @param[in] recorded_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] recorded_step_x recorded_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] recorded_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] recorded_step_y recorded_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] recorded_offset_first_element_in_bytes The offset of the first element of the output
- * @param[out] l1_stack_ptr Pointer to the l1 stack of a pixel. Supported data types: S32
- * @param[in] l1_stack_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] l1_stack_step_x l1_stack_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] l1_stack_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] l1_stack_step_y l1_stack_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] l1_stack_offset_first_element_in_bytes The offset of the first element of the output
- * @param[out] l1_stack_counter_ptr Pointer to the l1 stack counters of an image. Supported data types: U8
- * @param[in] l1_stack_counter_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] l1_stack_counter_step_x l1_stack_counter_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] l1_stack_counter_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] l1_stack_counter_step_y l1_stack_counter_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] l1_stack_counter_offset_first_element_in_bytes The offset of the first element of the output
- * @param[in] low_thr The lower threshold
- * @param[in] up_thr The upper threshold
- * @param[in] width The width of the image.
- * @param[in] height The height of the image
- */
-kernel void hysteresis(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(out),
- IMAGE_DECLARATION(visited),
- IMAGE_DECLARATION(recorded),
- IMAGE_DECLARATION(l1_stack),
- IMAGE_DECLARATION(l1_stack_counter),
- uint low_thr,
- uint up_thr,
- int width,
- int height)
-{
- // Create images
- Image src = CONVERT_TO_IMAGE_STRUCT_NO_STEP(src);
- Image out = CONVERT_TO_IMAGE_STRUCT_NO_STEP(out);
- Image visited = CONVERT_TO_IMAGE_STRUCT_NO_STEP(visited);
- Image recorded = CONVERT_TO_IMAGE_STRUCT_NO_STEP(recorded);
- Image l1_stack = CONVERT_TO_IMAGE_STRUCT_NO_STEP(l1_stack);
- Image l1_stack_counter = CONVERT_TO_IMAGE_STRUCT_NO_STEP(l1_stack_counter);
-
- // Index
- int x = get_global_id(0);
- int y = get_global_id(1);
-
- // Load value
- DATA_TYPE_IN val = *((__global DATA_TYPE_IN *)offset(&src, x, y));
-
- // If the pixel has already been marked as NO_EDGE, store that value in the output and return
- if(val == NO_EDGE)
- {
- *offset(&out, x, y) = NO_EDGE;
- return;
- }
-
- // Return if it is a MAYBE pixel. Such pixels will become edges if near a strong edge
- if(val <= up_thr)
- {
- return;
- }
-
- // Init local stack 2
- short2 stack_L2[hysteresis_local_stack_L2] = { 0 };
- int L2_counter = 0;
-
- // Perform recursive hysteresis
- while(true)
- {
- // Get L1 stack pointer
- __global short2 *l1_ptr = (__global short2 *)(l1_stack.ptr + y * l1_stack.stride_y + x * hysteresis_local_stack_L1 * l1_stack.stride_x);
-
- // If the pixel has already been visited, proceed with the items in the stack instead
- if(atomic_or((__global uint *)offset(&visited, x, y), 1) != 0)
- {
- goto pop_stack;
- }
-
- // Set strong edge
- *offset(&out, x, y) = EDGE;
-
- // If it is the top of stack l2, we don't need check the surrounding pixels
- if(L2_counter > (hysteresis_local_stack_L2 - 1))
- {
- goto pop_stack2;
- }
-
- // Points to the start of the local stack;
- char c;
-
- VEC_DATA_TYPE(DATA_TYPE_IN, 4)
- x_tmp;
- uint4 v_tmp;
-
- // Get direction pixel indices
- int N = max(y - 1, 0), S = min(y + 1, height - 2), W = max(x - 1, 0), E = min(x + 1, width - 2);
-
- // Check 8 pixels around for weak edges where low_thr < val <= up_thr
- x_tmp = vload4(0, (__global DATA_TYPE_IN *)offset(&src, W, N));
- v_tmp = vload4(0, (__global uint *)offset(&visited, W, N));
- check_pixel(((x_tmp.s0 <= low_thr) || v_tmp.s0 || (x_tmp.s0 > up_thr)), W, N, x, y); // NW
- check_pixel(((x_tmp.s1 <= low_thr) || v_tmp.s1 || (x_tmp.s1 > up_thr)), x, N, x, y); // N
- check_pixel(((x_tmp.s2 <= low_thr) || v_tmp.s2 || (x_tmp.s2 > up_thr)), E, N, x, y); // NE
-
- x_tmp = vload4(0, (__global DATA_TYPE_IN *)offset(&src, W, y));
- v_tmp = vload4(0, (__global uint *)offset(&visited, W, y));
- check_pixel(((x_tmp.s0 <= low_thr) || v_tmp.s0 || (x_tmp.s0 > up_thr)), W, y, x, y); // W
- check_pixel(((x_tmp.s2 <= low_thr) || v_tmp.s2 || (x_tmp.s2 > up_thr)), E, y, x, y); // E
-
- x_tmp = vload4(0, (__global DATA_TYPE_IN *)offset(&src, W, S));
- v_tmp = vload4(0, (__global uint *)offset(&visited, W, S));
- check_pixel(((x_tmp.s0 <= low_thr) || v_tmp.s0 || (x_tmp.s0 > up_thr)), W, S, x, y); // SW
- check_pixel(((x_tmp.s1 <= low_thr) || v_tmp.s1 || (x_tmp.s1 > up_thr)), x, S, x, y); // S
- check_pixel(((x_tmp.s2 <= low_thr) || v_tmp.s2 || (x_tmp.s2 > up_thr)), E, S, x, y); // SE
-
-#undef check_pixel
-
-pop_stack:
- c = *((__global char *)offset(&l1_stack_counter, x, y));
-
- if(c >= 1)
- {
- *((__global char *)offset(&l1_stack_counter, x, y)) -= 1;
- int2 l_c = convert_int2(l1_ptr[c - 1]);
-
- // Push the current position into level 2 stack
- stack_L2[L2_counter].x = x;
- stack_L2[L2_counter].y = y;
-
- x = l_c.x;
- y = l_c.y;
-
- L2_counter++;
-
- continue;
- }
-
- if(L2_counter > 0)
- {
- goto pop_stack2;
- }
- else
- {
- return;
- }
-
-pop_stack2:
- L2_counter--;
- x = stack_L2[L2_counter].x;
- y = stack_L2[L2_counter].y;
- };
-}
diff --git a/src/core/CL/cl_kernels/channel_combine.cl b/src/core/CL/cl_kernels/channel_combine.cl
deleted file mode 100644
index 550d52e9ea..0000000000
--- a/src/core/CL/cl_kernels/channel_combine.cl
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Copyright (c) 2016-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This function combines three planes to a single RGB image.
- *
- * @param[in] plane0_ptr Pointer to the first plane. Supported Format: U8
- * @param[in] plane0_stride_x Stride of the first plane in X dimension (in bytes)
- * @param[in] plane0_step_x plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane0_stride_y Stride of the first plane in Y dimension (in bytes)
- * @param[in] plane0_step_y plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane0_offset_first_element_in_bytes The offset of the first element in the first plane
- * @param[in] plane1_ptr Pointer to the second plane. Supported Format: U8
- * @param[in] plane1_stride_x Stride of the second plane in X dimension (in bytes)
- * @param[in] plane1_step_x plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane1_stride_y Stride of the second plane in Y dimension (in bytes)
- * @param[in] plane1_step_y plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane1_offset_first_element_in_bytes The offset of the first element in the second plane
- * @param[in] plane2_ptr Pointer to the third plane. Supported Format: U8
- * @param[in] plane2_stride_x Stride of the third plane in X dimension (in bytes)
- * @param[in] plane2_step_x plane2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane2_stride_y Stride of the third plane in Y dimension (in bytes)
- * @param[in] plane2_step_y plane2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane2_offset_first_element_in_bytes The offset of the first element in the third plane
- * @param[in] dst_ptr Pointer to the destination image. Supported Format: RGB
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_combine_RGB888(
- IMAGE_DECLARATION(plane0),
- IMAGE_DECLARATION(plane1),
- IMAGE_DECLARATION(plane2),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image plane0 = CONVERT_TO_IMAGE_STRUCT(plane0);
- Image plane1 = CONVERT_TO_IMAGE_STRUCT(plane1);
- Image plane2 = CONVERT_TO_IMAGE_STRUCT(plane2);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data0 = vload16(0, plane0.ptr);
- uchar16 data1 = vload16(0, plane1.ptr);
- uchar16 data2 = vload16(0, plane2.ptr);
-
- uchar16 out0 = (uchar16)(data0.s0, data1.s0, data2.s0,
- data0.s1, data1.s1, data2.s1,
- data0.s2, data1.s2, data2.s2,
- data0.s3, data1.s3, data2.s3,
- data0.s4, data1.s4, data2.s4,
- data0.s5);
- vstore16(out0, 0, dst.ptr);
-
- uchar16 out1 = (uchar16)(data1.s5, data2.s5, data0.s6,
- data1.s6, data2.s6, data0.s7,
- data1.s7, data2.s7, data0.s8,
- data1.s8, data2.s8, data0.s9,
- data1.s9, data2.s9, data0.sA,
- data1.sA);
- vstore16(out1, 0, dst.ptr + 16);
-
- uchar16 out2 = (uchar16)(data2.sA, data0.sB, data1.sB,
- data2.sB, data0.sC, data1.sC,
- data2.sC, data0.sD, data1.sD,
- data2.sD, data0.sE, data1.sE,
- data2.sE, data0.sF, data1.sF,
- data2.sF);
- vstore16(out2, 0, dst.ptr + 32);
-}
-
-/** This function combines three planes to a single RGBA image.
- *
- * @param[in] plane0_ptr Pointer to the first plane. Supported Format: U8
- * @param[in] plane0_stride_x Stride of the first plane in X dimension (in bytes)
- * @param[in] plane0_step_x plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane0_stride_y Stride of the first plane in Y dimension (in bytes)
- * @param[in] plane0_step_y plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane0_offset_first_element_in_bytes The offset of the first element in the first plane
- * @param[in] plane1_ptr Pointer to the second plane. Supported Format: U8
- * @param[in] plane1_stride_x Stride of the second plane in X dimension (in bytes)
- * @param[in] plane1_step_x plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane1_stride_y Stride of the second plane in Y dimension (in bytes)
- * @param[in] plane1_step_y plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane1_offset_first_element_in_bytes The offset of the first element in the second plane
- * @param[in] plane2_ptr Pointer to the third plane. Supported Format: U8
- * @param[in] plane2_stride_x Stride of the third plane in X dimension (in bytes)
- * @param[in] plane2_step_x plane2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane2_stride_y Stride of the third plane in Y dimension (in bytes)
- * @param[in] plane2_step_y plane2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane2_offset_first_element_in_bytes The offset of the first element in the third plane
- * @param[in] plane3_ptr Pointer to the fourth plane. Supported Format: U8
- * @param[in] plane3_stride_x Stride of the fourth plane in X dimension (in bytes)
- * @param[in] plane3_step_x plane3_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane3_stride_y Stride of the fourth plane in Y dimension (in bytes)
- * @param[in] plane3_step_y plane3_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane3_offset_first_element_in_bytes The offset of the first element in the fourth plane
- * @param[in] dst_ptr Pointer to the destination image. Supported Format: RGBA
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_combine_RGBA8888(
- IMAGE_DECLARATION(plane0),
- IMAGE_DECLARATION(plane1),
- IMAGE_DECLARATION(plane2),
- IMAGE_DECLARATION(plane3),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image plane0 = CONVERT_TO_IMAGE_STRUCT(plane0);
- Image plane1 = CONVERT_TO_IMAGE_STRUCT(plane1);
- Image plane2 = CONVERT_TO_IMAGE_STRUCT(plane2);
- Image plane3 = CONVERT_TO_IMAGE_STRUCT(plane3);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data0 = vload16(0, plane0.ptr);
- uchar16 data1 = vload16(0, plane1.ptr);
- uchar16 data2 = vload16(0, plane2.ptr);
- uchar16 data3 = vload16(0, plane3.ptr);
-
- uchar16 out0 = (uchar16)(data0.s0, data1.s0, data2.s0, data3.s0,
- data0.s1, data1.s1, data2.s1, data3.s1,
- data0.s2, data1.s2, data2.s2, data3.s2,
- data0.s3, data1.s3, data2.s3, data3.s3);
- vstore16(out0, 0, dst.ptr);
-
- uchar16 out1 = (uchar16)(data0.s4, data1.s4, data2.s4, data3.s4,
- data0.s5, data1.s5, data2.s5, data3.s5,
- data0.s6, data1.s6, data2.s6, data3.s6,
- data0.s7, data1.s7, data2.s7, data3.s7);
- vstore16(out1, 0, dst.ptr + 16);
-
- uchar16 out2 = (uchar16)(data0.s8, data1.s8, data2.s8, data3.s8,
- data0.s9, data1.s9, data2.s9, data3.s9,
- data0.sA, data1.sA, data2.sA, data3.sA,
- data0.sB, data1.sB, data2.sB, data3.sB);
- vstore16(out2, 0, dst.ptr + 32);
-
- uchar16 out3 = (uchar16)(data0.sC, data1.sC, data2.sC, data3.sC,
- data0.sD, data1.sD, data2.sD, data3.sD,
- data0.sE, data1.sE, data2.sE, data3.sE,
- data0.sF, data1.sF, data2.sF, data3.sF);
- vstore16(out3, 0, dst.ptr + 48);
-}
-
-/** This function combines three planes to a single YUYV image.
- *
- * @param[in] plane0_ptr Pointer to the first plane. Supported Format: U8
- * @param[in] plane0_stride_x Stride of the first plane in X dimension (in bytes)
- * @param[in] plane0_step_x plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane0_stride_y Stride of the first plane in Y dimension (in bytes)
- * @param[in] plane0_step_y plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane0_offset_first_element_in_bytes The offset of the first element in the first plane
- * @param[in] plane1_ptr Pointer to the second plane. Supported Format: U8
- * @param[in] plane1_stride_x Stride of the second plane in X dimension (in bytes)
- * @param[in] plane1_step_x plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane1_stride_y Stride of the second plane in Y dimension (in bytes)
- * @param[in] plane1_step_y plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane1_offset_first_element_in_bytes The offset of the first element in the second plane
- * @param[in] plane2_ptr Pointer to the third plane. Supported Format: U8
- * @param[in] plane2_stride_x Stride of the third plane in X dimension (in bytes)
- * @param[in] plane2_step_x plane2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane2_stride_y Stride of the third plane in Y dimension (in bytes)
- * @param[in] plane2_step_y plane2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane2_offset_first_element_in_bytes The offset of the first element in the third plane
- * @param[in] dst_ptr Pointer to the destination image. Supported Format: YUYV
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_combine_YUYV422(
- IMAGE_DECLARATION(plane0),
- IMAGE_DECLARATION(plane1),
- IMAGE_DECLARATION(plane2),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image plane0 = CONVERT_TO_IMAGE_STRUCT(plane0);
- Image plane1 = CONVERT_TO_IMAGE_STRUCT(plane1);
- Image plane2 = CONVERT_TO_IMAGE_STRUCT(plane2);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data0 = vload16(0, plane0.ptr);
- uchar8 data1 = vload8(0, plane1.ptr);
- uchar8 data2 = vload8(0, plane2.ptr);
-
- uchar16 out0 = (uchar16)(data0.s0, data1.s0, data0.s1, data2.s0,
- data0.s2, data1.s1, data0.s3, data2.s1,
- data0.s4, data1.s2, data0.s5, data2.s2,
- data0.s6, data1.s3, data0.s7, data2.s3);
- vstore16(out0, 0, dst.ptr);
- uchar16 out1 = (uchar16)(data0.s8, data1.s4, data0.s9, data2.s4,
- data0.sA, data1.s5, data0.sB, data2.s5,
- data0.sC, data1.s6, data0.sD, data2.s6,
- data0.sE, data1.s7, data0.sF, data2.s7);
- vstore16(out1, 0, dst.ptr + 16);
-}
-
-/** This function combines three planes to a single UYUV image.
- *
- * @param[in] plane0_ptr Pointer to the first plane. Supported Format: U8
- * @param[in] plane0_stride_x Stride of the first plane in X dimension (in bytes)
- * @param[in] plane0_step_x plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane0_stride_y Stride of the first plane in Y dimension (in bytes)
- * @param[in] plane0_step_y plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane0_offset_first_element_in_bytes The offset of the first element in the first plane
- * @param[in] plane1_ptr Pointer to the second plane. Supported Format: U8
- * @param[in] plane1_stride_x Stride of the second plane in X dimension (in bytes)
- * @param[in] plane1_step_x plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane1_stride_y Stride of the second plane in Y dimension (in bytes)
- * @param[in] plane1_step_y plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane1_offset_first_element_in_bytes The offset of the first element in the second plane
- * @param[in] plane2_ptr Pointer to the third plane. Supported Format: U8
- * @param[in] plane2_stride_x Stride of the third plane in X dimension (in bytes)
- * @param[in] plane2_step_x plane2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] plane2_stride_y Stride of the third plane in Y dimension (in bytes)
- * @param[in] plane2_step_y plane2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] plane2_offset_first_element_in_bytes The offset of the first element in the third plane
- * @param[in] dst_ptr Pointer to the destination image. Supported Format: UYUV
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_combine_UYVY422(
- IMAGE_DECLARATION(plane0),
- IMAGE_DECLARATION(plane1),
- IMAGE_DECLARATION(plane2),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image plane0 = CONVERT_TO_IMAGE_STRUCT(plane0);
- Image plane1 = CONVERT_TO_IMAGE_STRUCT(plane1);
- Image plane2 = CONVERT_TO_IMAGE_STRUCT(plane2);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data0 = vload16(0, plane0.ptr);
- uchar8 data1 = vload8(0, plane1.ptr);
- uchar8 data2 = vload8(0, plane2.ptr);
-
- uchar16 out0 = (uchar16)(data1.s0, data0.s0, data2.s0, data0.s1,
- data1.s1, data0.s2, data2.s1, data0.s3,
- data1.s2, data0.s4, data2.s2, data0.s5,
- data1.s3, data0.s6, data2.s3, data0.s7);
- vstore16(out0, 0, dst.ptr);
- uchar16 out1 = (uchar16)(data1.s4, data0.s8, data2.s4, data0.s9,
- data1.s5, data0.sA, data2.s5, data0.sB,
- data1.s6, data0.sC, data2.s6, data0.sD,
- data1.s7, data0.sE, data2.s7, data0.sF);
- vstore16(out1, 0, dst.ptr + 16);
-}
-
-/** This function combines three planes to a single NV12/NV21 image.
- *
- * @note NV12 or NV21 has to be specified through preprocessor macro. eg. -DNV12 performs NV12 channel combine.
- *
- * @param[in] src_plane0_ptr Pointer to the first plane. Supported Format: U8
- * @param[in] src_plane0_stride_x Stride of the first plane in X dimension (in bytes)
- * @param[in] src_plane0_step_x src_plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_plane0_stride_y Stride of the first plane in Y dimension (in bytes)
- * @param[in] src_plane0_step_y src_plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_plane0_offset_first_element_in_bytes The offset of the first element in the first plane
- * @param[in] src_plane1_ptr Pointer to the second plane. Supported Format: U8
- * @param[in] src_plane1_stride_x Stride of the second plane in X dimension (in bytes)
- * @param[in] src_plane1_step_x src_plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_plane1_stride_y Stride of the second plane in Y dimension (in bytes)
- * @param[in] src_plane1_step_y src_plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_plane1_offset_first_element_in_bytes The offset of the first element in the second plane
- * @param[in] src_plane2_ptr Pointer to the third plane. Supported Format: U8
- * @param[in] src_plane2_stride_x Stride of the third plane in X dimension (in bytes)
- * @param[in] src_plane2_step_x src_plane2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_plane2_stride_y Stride of the third plane in Y dimension (in bytes)
- * @param[in] src_plane2_step_y src_plane2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_plane2_offset_first_element_in_bytes The offset of the first element in the third plane
- * @param[in] dst_plane0_ptr Pointer to the first plane of the destination image. Supported Format: U8
- * @param[in] dst_plane0_stride_x Stride of the first plane of the destination image in X dimension (in bytes)
- * @param[in] dst_plane0_step_x dst_plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_plane0_stride_y Stride of the first plane of the destination image in Y dimension (in bytes)
- * @param[in] dst_plane0_step_y dst_plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_plane0_offset_first_element_in_bytes The offset of the first element in the first plane of the destination image
- * @param[in] dst_plane1_ptr Pointer to the second plane of the destination image. Supported Format: UV88
- * @param[in] dst_plane1_stride_x Stride of the second plane of the destination image in X dimension (in bytes)
- * @param[in] dst_plane1_step_x dst_plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_plane1_stride_y Stride of the second plane of the destination image in Y dimension (in bytes)
- * @param[in] dst_plane1_step_y dst_plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_plane1_offset_first_element_in_bytes The offset of the first element in the second plane of the destination image
- * @param[in] height Sub-sampled height
- */
-__kernel void channel_combine_NV(
- IMAGE_DECLARATION(src_plane0),
- IMAGE_DECLARATION(src_plane1),
- IMAGE_DECLARATION(src_plane2),
- IMAGE_DECLARATION(dst_plane0),
- IMAGE_DECLARATION(dst_plane1),
- uint height)
-{
- // Get pixels pointer
- Image src_plane0 = CONVERT_TO_IMAGE_STRUCT(src_plane0);
- Image src_plane1 = CONVERT_TO_IMAGE_STRUCT(src_plane1);
- Image src_plane2 = CONVERT_TO_IMAGE_STRUCT(src_plane2);
- Image dst_plane0 = CONVERT_TO_IMAGE_STRUCT(dst_plane0);
- Image dst_plane1 = CONVERT_TO_IMAGE_STRUCT(dst_plane1);
-
- // Copy plane data
- vstore16(vload16(0, src_plane0.ptr), 0, dst_plane0.ptr);
- vstore16(vload16(0, offset(&src_plane0, 0, height)), 0, (__global uchar *)offset(&dst_plane0, 0, height));
-
- // Create UV place
- uchar8 data1 = vload8(0, src_plane1.ptr);
- uchar8 data2 = vload8(0, src_plane2.ptr);
-
-#ifdef NV12
- vstore16(shuffle2(data1, data2, (uchar16)(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15)), 0, dst_plane1.ptr);
-#elif defined(NV21)
- vstore16(shuffle2(data2, data1, (uchar16)(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15)), 0, dst_plane1.ptr);
-#endif /* NV12 or NV21 */
-}
-
-/** This function combines three planes to a single YUV444 or IYUV image.
- *
- * @note YUV444 or IYUV has to be specified through preprocessor macro. eg. -DIYUV performs IYUV channel combine.
- *
- * @param[in] src_plane0_ptr Pointer to the first plane. Supported Format: U8
- * @param[in] src_plane0_stride_x Stride of the first plane in X dimension (in bytes)
- * @param[in] src_plane0_step_x src_plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_plane0_stride_y Stride of the first plane in Y dimension (in bytes)
- * @param[in] src_plane0_step_y src_plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_plane0_offset_first_element_in_bytes The offset of the first element in the first plane
- * @param[in] src_plane1_ptr Pointer to the second plane. Supported Format: U8
- * @param[in] src_plane1_stride_x Stride of the second plane in X dimension (in bytes)
- * @param[in] src_plane1_step_x src_plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_plane1_stride_y Stride of the second plane in Y dimension (in bytes)
- * @param[in] src_plane1_step_y src_plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_plane1_offset_first_element_in_bytes The offset of the first element in the second plane
- * @param[in] src_plane2_ptr Pointer to the third plane. Supported Format: U8
- * @param[in] src_plane2_stride_x Stride of the third plane in X dimension (in bytes)
- * @param[in] src_plane2_step_x src_plane2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_plane2_stride_y Stride of the third plane in Y dimension (in bytes)
- * @param[in] src_plane2_step_y src_plane2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_plane2_offset_first_element_in_bytes The offset of the first element in the third plane
- * @param[in] dst_plane0_ptr Pointer to the first plane of the destination image. Supported Format: U8
- * @param[in] dst_plane0_stride_x Stride of the first plane of the destination image in X dimension (in bytes)
- * @param[in] dst_plane0_step_x dst_plane0_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_plane0_stride_y Stride of the first plane of the destination image in Y dimension (in bytes)
- * @param[in] dst_plane0_step_y dst_plane0_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_plane0_offset_first_element_in_bytes The offset of the first element in the first plane of the destination image
- * @param[in] dst_plane1_ptr Pointer to the second plane of the destination image. Supported Format: U8
- * @param[in] dst_plane1_stride_x Stride of the second plane of the destination image in X dimension (in bytes)
- * @param[in] dst_plane1_step_x dst_plane1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_plane1_stride_y Stride of the second plane of the destination image in Y dimension (in bytes)
- * @param[in] dst_plane1_step_y dst_plane1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_plane1_offset_first_element_in_bytes The offset of the first element in the second plane of the destination image
- * @param[in] dst_plane2_ptr Pointer to the third plane of the destination image. Supported Format: U8
- * @param[in] dst_plane2_stride_x Stride of the third plane of the destination image in X dimension (in bytes)
- * @param[in] dst_plane2_step_x dst_plane2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_plane2_stride_y Stride of the third plane of the destination image in Y dimension (in bytes)
- * @param[in] dst_plane2_step_y dst_plane2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_plane2_offset_first_element_in_bytes The offset of the first element in the third plane of the destination image
- * @param[in] height Sub-sampled height
- */
-__kernel void copy_planes_3p(
- IMAGE_DECLARATION(src_plane0),
- IMAGE_DECLARATION(src_plane1),
- IMAGE_DECLARATION(src_plane2),
- IMAGE_DECLARATION(dst_plane0),
- IMAGE_DECLARATION(dst_plane1),
- IMAGE_DECLARATION(dst_plane2),
- uint height)
-{
- // Get pixels pointer
- Image src_plane0 = CONVERT_TO_IMAGE_STRUCT(src_plane0);
- Image src_plane1 = CONVERT_TO_IMAGE_STRUCT(src_plane1);
- Image src_plane2 = CONVERT_TO_IMAGE_STRUCT(src_plane2);
- Image dst_plane0 = CONVERT_TO_IMAGE_STRUCT(dst_plane0);
- Image dst_plane1 = CONVERT_TO_IMAGE_STRUCT(dst_plane1);
- Image dst_plane2 = CONVERT_TO_IMAGE_STRUCT(dst_plane2);
-
- // Copy plane data
- vstore16(vload16(0, src_plane0.ptr), 0, dst_plane0.ptr);
-#ifdef YUV444
- vstore16(vload16(0, src_plane1.ptr), 0, dst_plane1.ptr);
- vstore16(vload16(0, src_plane2.ptr), 0, dst_plane2.ptr);
-#elif defined(IYUV)
- vstore16(vload16(0, offset(&src_plane0, 0, height)), 0, (__global uchar *)offset(&dst_plane0, 0, height));
- vstore8(vload8(0, src_plane1.ptr), 0, dst_plane1.ptr);
- vstore8(vload8(0, src_plane2.ptr), 0, dst_plane2.ptr);
-#endif /* YUV444 or IYUV */
-}
diff --git a/src/core/CL/cl_kernels/channel_extract.cl b/src/core/CL/cl_kernels/channel_extract.cl
deleted file mode 100644
index b64f24814e..0000000000
--- a/src/core/CL/cl_kernels/channel_extract.cl
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (c) 2016-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This function extracts a given channel from an RGB image.
- *
- * @note Channel to be extracted should be passed as a pre-processor argument, e.g. -DCHANNEL_B will extract the B channel.
- *
- * @param[in] src_ptr Pointer to the source image. Supported Format: RGB
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_extract_RGB888(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data = vload16(0, src.ptr);
- uchar8 data2 = vload8(0, src.ptr + 16);
-
-#ifdef CHANNEL_R
- vstore4(data.s0369, 0, dst.ptr);
- vstore4((uchar4)(data.sCF, data2.s25), 0, dst.ptr + 4);
-#elif defined(CHANNEL_G)
- vstore4(data.s147A, 0, dst.ptr);
- vstore4((uchar4)(data.sD, data2.s036), 0, dst.ptr + 4);
-#elif defined(CHANNEL_B)
- vstore4(data.s258B, 0, dst.ptr);
- vstore4((uchar4)(data.sE, data2.s147), 0, dst.ptr + 4);
-#endif /* CHANNEL_R or CHANNEL_G or CHANNEL_B */
-}
-
-/** This function extracts a given channel from an RGBA image.
- *
- * @note Channel to be extracted should be passed as a pre-processor argument, e.g. -DCHANNEL_B will extract the B channel.
- *
- * @param[in] src_ptr Pointer to the source image. Supported Format: RGBA
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_extract_RGBA8888(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data = vload16(0, src.ptr);
- uchar16 data2 = vload16(0, src.ptr + 16);
-
-#ifdef CHANNEL_R
- vstore8((uchar8)(data.s048C, data2.s048C), 0, dst.ptr);
-#elif defined(CHANNEL_G)
- vstore8((uchar8)(data.s159D, data2.s159D), 0, dst.ptr);
-#elif defined(CHANNEL_B)
- vstore8((uchar8)(data.s26AE, data2.s26AE), 0, dst.ptr);
-#elif defined(CHANNEL_A)
- vstore8((uchar8)(data.s37BF, data2.s37BF), 0, dst.ptr);
-#endif /* CHANNEL_R or CHANNEL_G or CHANNEL_B or CHANNEL_A */
-}
-
-/** This function extracts a given channel from an YUYV image.
- *
- * @note Channel to be extracted should be passed as a pre-processor argument, e.g. -DCHANNEL_U will extract the U channel.
- *
- * @param[in] src_ptr Pointer to the source image. Supported Format: YUYV
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_extract_YUYV422(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data = vload16(0, src.ptr);
-
-#ifdef CHANNEL_Y
- vstore8(data.s02468ACE, 0, dst.ptr);
-#elif defined(CHANNEL_U)
- vstore4(data.s159D, 0, dst.ptr);
-#elif defined(CHANNEL_V)
- vstore4(data.s37BF, 0, dst.ptr);
-#endif /* CHANNEL_Y or CHANNEL_U or CHANNEL_V */
-}
-
-/** This function extracts a given channel from an UYUV image.
- *
- * @note Channel to be extracted should be passed as a pre-processor argument, e.g. -DCHANNEL_U will extract the U channel.
- *
- * @param[in] src_ptr Pointer to the source image. Supported Format: UYUV
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_extract_UYVY422(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data = vload16(0, src.ptr);
-
-#ifdef CHANNEL_Y
- vstore8(data.s13579BDF, 0, dst.ptr);
-#elif defined(CHANNEL_U)
- vstore4(data.s048C, 0, dst.ptr);
-#elif defined(CHANNEL_V)
- vstore4(data.s26AE, 0, dst.ptr);
-#endif /* CHANNEL_Y or CHANNEL_U or CHANNEL_V */
-}
-
-/** This function extracts a given channel from an NV12 image.
- *
- * @note Channel to be extracted should be passed as a pre-processor argument, e.g. -DCHANNEL_U will extract the U channel.
- * @warning Only channels UV can be extracted using this kernel.
- *
- * @param[in] src_ptr Pointer to the source image. Supported Format: NV12 (UV88)
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_extract_NV12(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data = vload16(0, src.ptr);
-
-#ifdef CHANNEL_U
- vstore8(data.s02468ACE, 0, dst.ptr);
-#elif defined(CHANNEL_V)
- vstore8(data.s13579BDF, 0, dst.ptr);
-#endif /* CHANNEL_U or CHANNEL_V */
-}
-
-/** This function extracts a given channel from an NV21 image.
- *
- * @note Channel to be extracted should be passed as a pre-processor argument, e.g. -DCHANNEL_U will extract the U channel.
- * @warning Only channels UV can be extracted using this kernel.
- *
- * @param[in] src_ptr Pointer to the source image. Supported Format: NV21 (UV88)
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void channel_extract_NV21(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 data = vload16(0, src.ptr);
-
-#ifdef CHANNEL_U
- vstore8(data.s13579BDF, 0, dst.ptr);
-#elif defined(CHANNEL_V)
- vstore8(data.s02468ACE, 0, dst.ptr);
-#endif /* CHANNEL_U or CHANNEL_V */
-}
-
-/** This function extracts a given plane from an multi-planar image.
- *
- * @param[in] src_ptr Pointer to the source image. Supported Format: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void copy_plane(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Copy plane data
- vstore8(vload8(0, src.ptr), 0, dst.ptr);
-}
diff --git a/src/core/CL/cl_kernels/color_convert.cl b/src/core/CL/cl_kernels/color_convert.cl
deleted file mode 100644
index cbebc88668..0000000000
--- a/src/core/CL/cl_kernels/color_convert.cl
+++ /dev/null
@@ -1,1911 +0,0 @@
-/*
- * Copyright (c) 2016-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** Convert an RGB888 image to RGBX8888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void RGB888_to_RGBA8888_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
-
- // handle 16 pixels every time
- uchar16 rgb_0 = vload16(0, in.ptr);
- uchar16 rgb_1 = vload16(0, in.ptr + 16);
- uchar16 rgb_2 = vload16(0, in.ptr + 32);
-
- uchar16 rgba_0 = (uchar16)(rgb_0.s012, 255, rgb_0.s345, 255, rgb_0.s678, 255, rgb_0.s9ab, 255);
- uchar16 rgba_1 = (uchar16)(rgb_0.scde, 255, rgb_0.sf, rgb_1.s01, 255, rgb_1.s234, 255, rgb_1.s567, 255);
- uchar16 rgba_2 = (uchar16)(rgb_1.s89a, 255, rgb_1.sbcd, 255, rgb_1.sef, rgb_2.s0, 255, rgb_2.s123, 255);
- uchar16 rgba_3 = (uchar16)(rgb_2.s456, 255, rgb_2.s789, 255, rgb_2.sabc, 255, rgb_2.sdef, 255);
-
- vstore16(rgba_0, 0, out.ptr);
- vstore16(rgba_1, 0, out.ptr + 16);
- vstore16(rgba_2, 0, out.ptr + 32);
- vstore16(rgba_3, 0, out.ptr + 48);
-}
-
-/** Convert an RGB888 image to U8
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: RGB888
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void RGB888_to_U8_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
-
- // handle 16 pixels every time
- const uchar16 rgb_0 = vload16(0, in.ptr);
- const uchar16 rgb_1 = vload16(0, in.ptr + 16);
- const uchar16 rgb_2 = vload16(0, in.ptr + 32);
-
- //Resequence values from a sequence of 16 RGB values to sequence of 16 R, 16 G, 16 B values
- const uchar16 rgb_r = (uchar16)(rgb_0.s0369, rgb_0.scf, rgb_1.s258b, rgb_1.se, rgb_2.s147a, rgb_2.sd);
- const uchar16 rgb_g = (uchar16)(rgb_0.s147a, rgb_0.sd, rgb_1.s0369, rgb_1.scf, rgb_2.s258b, rgb_2.se);
- const uchar16 rgb_b = (uchar16)(rgb_0.s258b, rgb_0.se, rgb_1.s147a, rgb_1.sd, rgb_2.s0369, rgb_2.scf);
-
- const float16 rgb2u8_red_coef_bt709 = 0.2126f;
- const float16 rgb2u8_green_coef_bt709 = 0.7152f;
- const float16 rgb2u8_blue_coef_bt709 = 0.0722f;
-
- //Computation of 16 greyscale values in float
- const float16 greyscale_f_0 = rgb2u8_red_coef_bt709 * convert_float16(rgb_r) + rgb2u8_green_coef_bt709 * convert_float16(rgb_g) + rgb2u8_blue_coef_bt709 * convert_float16(rgb_b);
-
- //Convert it to 16 grayscale uchar values
- const uchar16 greyscale_u8_0 = convert_uchar16_sat_rtz(greyscale_f_0);
-
- vstore16(greyscale_u8_0, 0, out.ptr);
-}
-
-/** Convert an RGB888 image to RGBX8888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void RGBA8888_to_RGB888_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
- // handle 16 pixels every time
- uchar16 rgba_0 = vload16(0, in.ptr);
- uchar16 rgba_1 = vload16(0, in.ptr + 16);
- uchar16 rgba_2 = vload16(0, in.ptr + 32);
- uchar16 rgba_3 = vload16(0, in.ptr + 48);
-
- uchar16 rgb_0 = (uchar16)(rgba_0.s01245689, rgba_0.sacde, rgba_1.s0124);
- uchar16 rgb_1 = (uchar16)(rgba_1.s5689acde, rgba_2.s01245689);
- uchar16 rgb_2 = (uchar16)(rgba_2.sacde, rgba_3.s01245689, rgba_3.sacde);
-
- vstore16(rgb_0, 0, out.ptr);
- vstore16(rgb_1, 0, out.ptr + 16);
- vstore16(rgb_2, 0, out.ptr + 32);
-}
-
-/** Convert a UYVY422 image to RGB888 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void UYVY422_to_RGB888_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
-
- // handle 8 pixels every time
- uchar16 uyvy = vload16(0, in.ptr);
-
- uchar8 luma = (uchar8)(uyvy.s1, uyvy.s3, uyvy.s5, uyvy.s7, uyvy.s9, uyvy.sb, uyvy.sd, uyvy.sf);
- char8 cb = (char8)(uyvy.s0, uyvy.s0, uyvy.s4, uyvy.s4, uyvy.s8, uyvy.s8, uyvy.sc, uyvy.sc) - (char8)(128);
- char8 cr = (char8)(uyvy.s2, uyvy.s2, uyvy.s6, uyvy.s6, uyvy.sa, uyvy.sa, uyvy.se, uyvy.se) - (char8)(128);
-
- float8 red_coef_bt709 = (float8)(1.5748f);
- float8 green_coef_bt709 = (float8)(-0.1873f);
- float8 green_coef2_bt709 = (float8)(-0.4681f);
- float8 blue_coef_bt709 = (float8)(1.8556f);
- float8 lumav = convert_float8(luma);
-
- float8 f_r = red_coef_bt709 * convert_float8(cr);
- float8 f_g = green_coef_bt709 * convert_float8(cb) + green_coef2_bt709 * convert_float8(cr);
- float8 f_b = blue_coef_bt709 * convert_float8(cb);
-
- f_r += lumav;
- f_g += lumav;
- f_b += lumav;
-
- uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
- uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
- uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
-
- uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2,
- r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5);
- uchar8 rgb_1 = (uchar8)(g_0.s5, b_0.s5, r_0.s6, g_0.s6, b_0.s6, r_0.s7, g_0.s7, b_0.s7);
-
- vstore16(rgb_0, 0, out.ptr);
- vstore8(rgb_1, 0, out.ptr + 16);
-}
-
-/** Convert a UYVY422 image to RGBX8888 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void UYVY422_to_RGBA8888_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
-
- // handle 8 pixels every time
- uchar16 uyvy = vload16(0, in.ptr);
-
- uchar8 luma = (uchar8)(uyvy.s1, uyvy.s3, uyvy.s5, uyvy.s7, uyvy.s9, uyvy.sb, uyvy.sd, uyvy.sf);
- char8 cb = (char8)(uyvy.s0, uyvy.s0, uyvy.s4, uyvy.s4, uyvy.s8, uyvy.s8, uyvy.sc, uyvy.sc) - (char8)(128);
- char8 cr = (char8)(uyvy.s2, uyvy.s2, uyvy.s6, uyvy.s6, uyvy.sa, uyvy.sa, uyvy.se, uyvy.se) - (char8)(128);
-
- float8 red_coef_bt709 = (float8)(1.5748f);
- float8 green_coef_bt709 = (float8)(-0.1873f);
- float8 green_coef2_bt709 = (float8)(-0.4681f);
- float8 blue_coef_bt709 = (float8)(1.8556f);
- float8 lumav = convert_float8(luma);
-
- float8 f_r = red_coef_bt709 * convert_float8(cr);
- float8 f_g = green_coef_bt709 * convert_float8(cb) + green_coef2_bt709 * convert_float8(cr);
- float8 f_b = blue_coef_bt709 * convert_float8(cb);
-
- f_r += lumav;
- f_g += lumav;
- f_b += lumav;
-
- uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
- uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
- uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
-
- uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255,
- r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- uchar16 rgba_1 = (uchar16)(r_0.s4, g_0.s4, b_0.s4, 255, r_0.s5, g_0.s5, b_0.s5, 255,
- r_0.s6, g_0.s6, b_0.s6, 255, r_0.s7, g_0.s7, b_0.s7, 255);
-
- vstore16(rgba_0, 0, out.ptr);
- vstore16(rgba_1, 0, out.ptr + 16);
-}
-
-/** Convert a YUYV422 image to RGB888 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void YUYV422_to_RGB888_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
-
- // handle 8 pixels every time
- uchar16 uyvy = vload16(0, in.ptr);
-
- uchar8 luma = (uchar8)(uyvy.s0, uyvy.s2, uyvy.s4, uyvy.s6, uyvy.s8, uyvy.sa, uyvy.sc, uyvy.se);
- char8 cb = (char8)(uyvy.s1, uyvy.s1, uyvy.s5, uyvy.s5, uyvy.s9, uyvy.s9, uyvy.sd, uyvy.sd) - (char8)(128);
- char8 cr = (char8)(uyvy.s3, uyvy.s3, uyvy.s7, uyvy.s7, uyvy.sb, uyvy.sb, uyvy.sf, uyvy.sf) - (char8)(128);
-
- float8 red_coef_bt709 = (float8)(1.5748f);
- float8 green_coef_bt709 = (float8)(-0.1873f);
- float8 green_coef2_bt709 = (float8)(-0.4681f);
- float8 blue_coef_bt709 = (float8)(1.8556f);
- float8 lumav = convert_float8(luma);
-
- float8 f_r = red_coef_bt709 * convert_float8(cr);
- float8 f_g = green_coef_bt709 * convert_float8(cb) + green_coef2_bt709 * convert_float8(cr);
- float8 f_b = blue_coef_bt709 * convert_float8(cb);
-
- f_r += lumav;
- f_g += lumav;
- f_b += lumav;
-
- uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
- uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
- uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
-
- uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2,
- r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5);
- uchar8 rgb_1 = (uchar8)(g_0.s5, b_0.s5, r_0.s6, g_0.s6, b_0.s6, r_0.s7, g_0.s7, b_0.s7);
-
- vstore16(rgb_0, 0, out.ptr);
- vstore8(rgb_1, 0, out.ptr + 16);
-}
-
-/** Convert a YUYV422 image to RGBX8888 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void YUYV422_to_RGBA8888_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
-
- // handle 8 pixels every time
- uchar16 uyvy = vload16(0, in.ptr);
-
- uchar8 luma = (uchar8)(uyvy.s0, uyvy.s2, uyvy.s4, uyvy.s6, uyvy.s8, uyvy.sa, uyvy.sc, uyvy.se);
- char8 cb = (char8)(uyvy.s1, uyvy.s1, uyvy.s5, uyvy.s5, uyvy.s9, uyvy.s9, uyvy.sd, uyvy.sd) - (char8)(128);
- char8 cr = (char8)(uyvy.s3, uyvy.s3, uyvy.s7, uyvy.s7, uyvy.sb, uyvy.sb, uyvy.sf, uyvy.sf) - (char8)(128);
-
- float8 red_coef_bt709 = (float8)(1.5748f);
- float8 green_coef_bt709 = (float8)(-0.1873f);
- float8 green_coef2_bt709 = (float8)(-0.4681f);
- float8 blue_coef_bt709 = (float8)(1.8556f);
- float8 lumav = convert_float8(luma);
-
- float8 f_r = red_coef_bt709 * convert_float8(cr);
- float8 f_g = green_coef_bt709 * convert_float8(cb) + green_coef2_bt709 * convert_float8(cr);
- float8 f_b = blue_coef_bt709 * convert_float8(cb);
-
- f_r += lumav;
- f_g += lumav;
- f_b += lumav;
-
- uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
- uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
- uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
-
- uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255,
- r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- uchar16 rgba_1 = (uchar16)(r_0.s4, g_0.s4, b_0.s4, 255, r_0.s5, g_0.s5, b_0.s5, 255,
- r_0.s6, g_0.s6, b_0.s6, 255, r_0.s7, g_0.s7, b_0.s7, 255);
-
- vstore16(rgba_0, 0, out.ptr);
- vstore16(rgba_1, 0, out.ptr + 16);
-}
-
-/** Convert a RGB image to NV12 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_step_x luma_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_step_y luma_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_offset_first_element_in_bytes The offset of the first element in the destination image luma channel
- * @param[out] uv_ptr Pointer to the destination uv channel. Supported Format: U8
- * @param[in] uv_stride_x Stride of the destination uv channel in X dimension (in bytes)
- * @param[in] uv_step_x uv_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] uv_step_y uv_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_offset_first_element_in_bytes The offset of the first element in the destination image uv channel
- *
- */
-__kernel void RGB888_to_NV12_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(luma),
- IMAGE_DECLARATION(uv))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma);
- Image out_uv = CONVERT_TO_IMAGE_STRUCT(uv);
-
- // handle 4 pixels every time, two lines, each line for 2 pixels
- // Read 2 pixel of the first line
- uchar8 rgb_0 = vload8(0, in.ptr);
- uchar2 r_0 = (uchar2)(rgb_0.s0, rgb_0.s3);
- uchar2 g_0 = (uchar2)(rgb_0.s1, rgb_0.s4);
- uchar2 b_0 = (uchar2)(rgb_0.s2, rgb_0.s5);
-
- float2 f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_0) + (float2)(0.7152f) * convert_float2(g_0) + (float2)(0.0722f) * convert_float2(b_0);
- float2 f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_0) - (float2)(0.3854f) * convert_float2(g_0) + (float2)(0.5000f) * convert_float2(b_0);
- float2 f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_0) - (float2)(0.4542f) * convert_float2(g_0) - (float2)(0.0458f) * convert_float2(b_0);
-
- short2 i_y = convert_short2_rtz(f_y);
- short2 i_u = convert_short2_rtz(f_u) + (short2)(128);
- short2 i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_0 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_0, 0, out_y.ptr);
-
- uchar2 cb_0 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_0 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
-
- // Read 2 pixel of the second line
- uchar8 rgb_1 = vload8(0, in.ptr + input_stride_y);
- uchar2 r_1 = (uchar2)(rgb_1.s0, rgb_1.s3);
- uchar2 g_1 = (uchar2)(rgb_1.s1, rgb_1.s4);
- uchar2 b_1 = (uchar2)(rgb_1.s2, rgb_1.s5);
-
- f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_1) + (float2)(0.7152f) * convert_float2(g_1) + (float2)(0.0722f) * convert_float2(b_1);
- f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_1) - (float2)(0.3854f) * convert_float2(g_1) + (float2)(0.5000f) * convert_float2(b_1);
- f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_1) - (float2)(0.4542f) * convert_float2(g_1) - (float2)(0.0458f) * convert_float2(b_1);
-
- i_y = convert_short2_rtz(f_y);
- i_u = convert_short2_rtz(f_u) + (short2)(128);
- i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_1 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_1, 0, out_y.ptr + luma_stride_y);
-
- uchar2 cb_1 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_1 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
- uchar2 cbcr = (uchar2)(((cb_0.s0 + cb_0.s1 + cb_1.s0 + cb_1.s1) / 4),
- ((cr_0.s0 + cr_0.s1 + cr_1.s0 + cr_1.s1) / 4));
-
- vstore2(cbcr, 0, out_uv.ptr);
-}
-
-/*
- R'= Y' + 0.0000*U + 1.5748*V
- G'= Y' - 0.1873*U - 0.4681*V
- B'= Y' + 1.8556*U + 0.0000*V
-*/
-
-/** Convert an NV12 image to RGB888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] rgb_output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] rgb_output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] rgb_output_step_x rgb_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgb_output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] rgb_output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgb_output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void NV12_to_RGB888_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(rgb_output))
-{
- Image in_luma = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_rgb = CONVERT_TO_IMAGE_STRUCT(rgb_output);
-
- // handle 8 pixels every time, two lines, each line for 4 pixels
- uchar4 luma_0 = vload4(0, in_luma.ptr);
- uchar4 luma_1 = vload4(0, in_luma.ptr + luma_input_stride_y);
- uchar4 cbcr = vload4(0, in_uv.ptr);
- char4 cb = (char4)(cbcr.s0, cbcr.s0, cbcr.s2, cbcr.s2) - (char4)(128);
- char4 cr = (char4)(cbcr.s1, cbcr.s1, cbcr.s3, cbcr.s3) - (char4)(128);
-
- float4 temp0 = (float4)(0.0000f) + (float4)(0.0000f) * convert_float4(cb) + (float4)(1.5748f) * convert_float4(cr);
- float4 temp1 = (float4)(0.0000f) - (float4)(0.1873f) * convert_float4(cb) - (float4)(0.4681f) * convert_float4(cr);
- float4 temp2 = (float4)(0.0000f) + (float4)(1.8556f) * convert_float4(cb) + (float4)(0.0000f) * convert_float4(cr);
-
- float4 f_r = convert_float4(luma_0) + temp0;
- float4 f_g = convert_float4(luma_0) + temp1;
- float4 f_b = convert_float4(luma_0) + temp2;
-
- uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
- uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
- uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
-
- uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
- uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
- vstore8(rgb_0, 0, out_rgb.ptr);
- vstore4(rgb_1, 0, out_rgb.ptr + 8);
-
- f_r = convert_float4(luma_1) + temp0;
- f_g = convert_float4(luma_1) + temp1;
- f_b = convert_float4(luma_1) + temp2;
-
- r_0 = convert_uchar4_sat_rtz(f_r);
- g_0 = convert_uchar4_sat_rtz(f_g);
- b_0 = convert_uchar4_sat_rtz(f_b);
-
- rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
- rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
- vstore8(rgb_0, 0, out_rgb.ptr + rgb_output_stride_y);
- vstore4(rgb_1, 0, out_rgb.ptr + rgb_output_stride_y + 8);
-}
-
-/** Convert a RGB image to YUV444 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] rgb_input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] rgb_input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] rgb_input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgb_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] rgb_input_step_y rgb_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgb_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination image V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- *
- */
-__kernel void RGB888_to_YUV444_bt709(
- IMAGE_DECLARATION(rgb_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- // handle 4 pixels every time
- Image in_rgb = CONVERT_TO_IMAGE_STRUCT(rgb_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // Read 4 pixel
- uchar16 rgb_0 = vload16(0, in_rgb.ptr);
- uchar4 r_0 = (uchar4)(rgb_0.s0, rgb_0.s3, rgb_0.s6, rgb_0.s9);
- uchar4 g_0 = (uchar4)(rgb_0.s1, rgb_0.s4, rgb_0.s7, rgb_0.sa);
- uchar4 b_0 = (uchar4)(rgb_0.s2, rgb_0.s5, rgb_0.s8, rgb_0.sb);
-
- float4 f_y = (float4)(0.0000f) + (float4)(0.2126f) * convert_float4(r_0) + (float4)(0.7152f) * convert_float4(g_0) + (float4)(0.0722f) * convert_float4(b_0);
- float4 f_u = (float4)(0.0000f) - (float4)(0.1146f) * convert_float4(r_0) - (float4)(0.3854f) * convert_float4(g_0) + (float4)(0.5000f) * convert_float4(b_0);
- float4 f_v = (float4)(0.0000f) + (float4)(0.5000f) * convert_float4(r_0) - (float4)(0.4542f) * convert_float4(g_0) - (float4)(0.0458f) * convert_float4(b_0);
-
- short4 i_y = convert_short4_rtz(f_y);
- short4 i_u = convert_short4_rtz(f_u) + (short4)(128);
- short4 i_v = convert_short4_rtz(f_v) + (short4)(128);
-
- uchar4 luma_0 = convert_uchar4(max((short4)(0), min(i_y, (short4)(255))));
- vstore4(luma_0, 0, out_y.ptr);
-
- uchar4 cb_0 = convert_uchar4(max((short4)(0), min(i_u, (short4)(255))));
- uchar4 cr_0 = convert_uchar4(max((short4)(0), min(i_v, (short4)(255))));
- vstore4(cb_0, 0, out_u.ptr);
- vstore4(cr_0, 0, out_v.ptr);
-}
-
-/** Convert a RGB image to IYUV using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 2), height ]
- * No offset.
- *
- * @param[in] rgb_input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] rgb_input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] rgb_input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgb_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] rgb_input_step_y rgb_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgb_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- *
- */
-__kernel void RGB888_to_IYUV_bt709(
- IMAGE_DECLARATION(rgb_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- // handle 4 pixels every time, two lines, each line for 2 pixels
- Image in_rgb = CONVERT_TO_IMAGE_STRUCT(rgb_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // Read 2 pixel of the first line
- uchar8 rgb_0 = vload8(0, in_rgb.ptr);
- uchar2 r_0 = (uchar2)(rgb_0.s0, rgb_0.s3);
- uchar2 g_0 = (uchar2)(rgb_0.s1, rgb_0.s4);
- uchar2 b_0 = (uchar2)(rgb_0.s2, rgb_0.s5);
-
- float2 f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_0) + (float2)(0.7152f) * convert_float2(g_0) + (float2)(0.0722f) * convert_float2(b_0);
- float2 f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_0) - (float2)(0.3854f) * convert_float2(g_0) + (float2)(0.5000f) * convert_float2(b_0);
- float2 f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_0) - (float2)(0.4542f) * convert_float2(g_0) - (float2)(0.0458f) * convert_float2(b_0);
-
- short2 i_y = convert_short2_rtz(f_y);
- short2 i_u = convert_short2_rtz(f_u) + (short2)(128);
- short2 i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_0 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_0, 0, out_y.ptr);
-
- uchar2 cb_0 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_0 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
-
- // Read 2 pixel of the second line
- uchar8 rgb_1 = vload8(0, in_rgb.ptr + rgb_input_stride_y);
- uchar2 r_1 = (uchar2)(rgb_1.s0, rgb_1.s3);
- uchar2 g_1 = (uchar2)(rgb_1.s1, rgb_1.s4);
- uchar2 b_1 = (uchar2)(rgb_1.s2, rgb_1.s5);
-
- f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_1) + (float2)(0.7152f) * convert_float2(g_1) + (float2)(0.0722f) * convert_float2(b_1);
- f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_1) - (float2)(0.3854f) * convert_float2(g_1) + (float2)(0.5000f) * convert_float2(b_1);
- f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_1) - (float2)(0.4542f) * convert_float2(g_1) - (float2)(0.0458f) * convert_float2(b_1);
-
- i_y = convert_short2_rtz(f_y);
- i_u = convert_short2_rtz(f_u) + (short2)(128);
- i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_1 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_1, 0, out_y.ptr + luma_output_stride_y);
-
- uchar2 cb_1 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_1 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
- uchar2 cbcr = (uchar2)(((cb_0.s0 + cb_0.s1 + cb_1.s0 + cb_1.s1) / 4),
- ((cr_0.s0 + cr_0.s1 + cr_1.s0 + cr_1.s1) / 4));
- *out_u.ptr = cbcr.x;
- *out_v.ptr = cbcr.y;
-}
-
-/** Convert a RGBA image to YUV444 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] rgba_input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] rgba_input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] rgba_input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgba_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] rgba_input_step_y rgb_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgba_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination image V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- *
- */
-__kernel void RGBA8888_to_YUV444_bt709(
- IMAGE_DECLARATION(rgba_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- // handle 4 pixels every time
- Image in_rgba = CONVERT_TO_IMAGE_STRUCT(rgba_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // Read 4 pixel
- uchar16 rgb_0 = vload16(0, in_rgba.ptr);
- uchar4 r_0 = (uchar4)(rgb_0.s0, rgb_0.s4, rgb_0.s8, rgb_0.sc);
- uchar4 g_0 = (uchar4)(rgb_0.s1, rgb_0.s5, rgb_0.s9, rgb_0.sd);
- uchar4 b_0 = (uchar4)(rgb_0.s2, rgb_0.s6, rgb_0.sa, rgb_0.se);
-
- float4 f_y = (float4)(0.0000f) + (float4)(0.2126f) * convert_float4(r_0) + (float4)(0.7152f) * convert_float4(g_0) + (float4)(0.0722f) * convert_float4(b_0);
- float4 f_u = (float4)(0.0000f) - (float4)(0.1146f) * convert_float4(r_0) - (float4)(0.3854f) * convert_float4(g_0) + (float4)(0.5000f) * convert_float4(b_0);
- float4 f_v = (float4)(0.0000f) + (float4)(0.5000f) * convert_float4(r_0) - (float4)(0.4542f) * convert_float4(g_0) - (float4)(0.0458f) * convert_float4(b_0);
-
- short4 i_y = convert_short4(f_y);
- short4 i_u = convert_short4(f_u) + (short4)(128);
- short4 i_v = convert_short4(f_v) + (short4)(128);
-
- uchar4 luma_0 = convert_uchar4_sat(max((short4)(0), min(i_y, (short4)(255))));
- vstore4(luma_0, 0, out_y.ptr);
-
- uchar4 cb_0 = convert_uchar4_sat(max((short4)(0), min(i_u, (short4)(255))));
- uchar4 cr_0 = convert_uchar4_sat(max((short4)(0), min(i_v, (short4)(255))));
- vstore4(cb_0, 0, out_u.ptr);
- vstore4(cr_0, 0, out_v.ptr);
-}
-
-/** Convert a RGBA image to NV12 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 2), height ]
- * No offset.
- *
- * @param[in] input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination image luma channel
- * @param[out] uv_output_ptr Pointer to the destination uv channel. Supported Format: U8
- * @param[in] uv_output_stride_x Stride of the destination uv channel in X dimension (in bytes)
- * @param[in] uv_output_step_x uv_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_output_stride_y Stride of the destination image uv channel in Y dimension (in bytes)
- * @param[in] uv_output_step_y uv_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_output_offset_first_element_in_bytes The offset of the first element in the destination image uv channel
- *
- */
-__kernel void RGBA8888_to_NV12_bt709(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(uv_output))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_uv = CONVERT_TO_IMAGE_STRUCT(uv_output);
-
- // Read 2 pixel of the first line
- uchar8 rgb_0 = vload8(0, in.ptr);
- uchar2 r_0 = (uchar2)(rgb_0.s0, rgb_0.s4);
- uchar2 g_0 = (uchar2)(rgb_0.s1, rgb_0.s5);
- uchar2 b_0 = (uchar2)(rgb_0.s2, rgb_0.s6);
-
- float2 f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_0) + (float2)(0.7152f) * convert_float2(g_0) + (float2)(0.0722f) * convert_float2(b_0);
- float2 f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_0) - (float2)(0.3854f) * convert_float2(g_0) + (float2)(0.5000f) * convert_float2(b_0);
- float2 f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_0) - (float2)(0.4542f) * convert_float2(g_0) - (float2)(0.0458f) * convert_float2(b_0);
-
- short2 i_y = convert_short2_rtz(f_y);
- short2 i_u = convert_short2_rtz(f_u) + (short2)(128);
- short2 i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_0 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_0, 0, out_y.ptr);
-
- uchar2 cb_0 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_0 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
-
- // Read 2 pixel of the second line
- uchar8 rgb_1 = vload8(0, in.ptr + input_stride_y);
- uchar2 r_1 = (uchar2)(rgb_1.s0, rgb_1.s4);
- uchar2 g_1 = (uchar2)(rgb_1.s1, rgb_1.s5);
- uchar2 b_1 = (uchar2)(rgb_1.s2, rgb_1.s6);
-
- f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_1) + (float2)(0.7152f) * convert_float2(g_1) + (float2)(0.0722f) * convert_float2(b_1);
- f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_1) - (float2)(0.3854f) * convert_float2(g_1) + (float2)(0.5000f) * convert_float2(b_1);
- f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_1) - (float2)(0.4542f) * convert_float2(g_1) - (float2)(0.0458f) * convert_float2(b_1);
-
- i_y = convert_short2_rtz(f_y);
- i_u = convert_short2_rtz(f_u) + (short2)(128);
- i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_1 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_1, 0, out_y.ptr + luma_output_stride_y);
-
- uchar2 cb_1 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_1 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
- uchar2 cbcr = (uchar2)(((cb_0.s0 + cb_0.s1 + cb_1.s0 + cb_1.s1) / 4),
- ((cr_0.s0 + cr_0.s1 + cr_1.s0 + cr_1.s1) / 4));
- vstore2(cbcr, 0, out_uv.ptr);
-}
-
-/** Convert a RGBA image to IYUV using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 2), height ]
- * No offset.
- *
- * @param[in] rgba_input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] rgba_input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] rgba_input_step_x rgba_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgba_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] rgba_input_step_y rgba_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgba_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- *
- */
-__kernel void RGBA8888_to_IYUV_bt709(
- IMAGE_DECLARATION(rgba_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- // handle 4 pixels every time, two lines, each line for 2 pixels
- Image in_rgb = CONVERT_TO_IMAGE_STRUCT(rgba_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // Read 2 pixel of the first line
- uchar8 rgb_0 = vload8(0, in_rgb.ptr);
- uchar2 r_0 = (uchar2)(rgb_0.s0, rgb_0.s4);
- uchar2 g_0 = (uchar2)(rgb_0.s1, rgb_0.s5);
- uchar2 b_0 = (uchar2)(rgb_0.s2, rgb_0.s6);
-
- float2 f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_0) + (float2)(0.7152f) * convert_float2(g_0) + (float2)(0.0722f) * convert_float2(b_0);
- float2 f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_0) - (float2)(0.3854f) * convert_float2(g_0) + (float2)(0.5000f) * convert_float2(b_0);
- float2 f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_0) - (float2)(0.4542f) * convert_float2(g_0) - (float2)(0.0458f) * convert_float2(b_0);
-
- short2 i_y = convert_short2_rtz(f_y);
- short2 i_u = convert_short2_rtz(f_u) + (short2)(128);
- short2 i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_0 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_0, 0, out_y.ptr);
-
- uchar2 cb_0 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_0 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
-
- // Read 2 pixel of the second line
- uchar8 rgb_1 = vload8(0, in_rgb.ptr + rgba_input_stride_y);
- uchar2 r_1 = (uchar2)(rgb_1.s0, rgb_1.s4);
- uchar2 g_1 = (uchar2)(rgb_1.s1, rgb_1.s5);
- uchar2 b_1 = (uchar2)(rgb_1.s2, rgb_1.s6);
-
- f_y = (float2)(0.0000f) + (float2)(0.2126f) * convert_float2(r_1) + (float2)(0.7152f) * convert_float2(g_1) + (float2)(0.0722f) * convert_float2(b_1);
- f_u = (float2)(0.0000f) - (float2)(0.1146f) * convert_float2(r_1) - (float2)(0.3854f) * convert_float2(g_1) + (float2)(0.5000f) * convert_float2(b_1);
- f_v = (float2)(0.0000f) + (float2)(0.5000f) * convert_float2(r_1) - (float2)(0.4542f) * convert_float2(g_1) - (float2)(0.0458f) * convert_float2(b_1);
-
- i_y = convert_short2_rtz(f_y);
- i_u = convert_short2_rtz(f_u) + (short2)(128);
- i_v = convert_short2_rtz(f_v) + (short2)(128);
-
- uchar2 luma_1 = convert_uchar2(max((short2)(0), min(i_y, (short2)(255))));
- vstore2(luma_1, 0, out_y.ptr + luma_output_stride_y);
-
- uchar2 cb_1 = convert_uchar2(max((short2)(0), min(i_u, (short2)(255))));
- uchar2 cr_1 = convert_uchar2(max((short2)(0), min(i_v, (short2)(255))));
- uchar2 cbcr = (uchar2)(((cb_0.s0 + cb_0.s1 + cb_1.s0 + cb_1.s1) / 4),
- ((cr_0.s0 + cr_0.s1 + cr_1.s0 + cr_1.s1) / 4));
- *out_u.ptr = cbcr.x;
- *out_v.ptr = cbcr.y;
-}
-
-/** Convert an NV12 image to RGB8888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] rgb_output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] rgb_output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] rgb_output_step_x rgb_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgb_output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] rgb_output_step_y rgb_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgb_output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void NV12_to_RGBA8888_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(rgb_output))
-{
- Image in_luma = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_rgb = CONVERT_TO_IMAGE_STRUCT(rgb_output);
-
- uchar4 luma_0 = vload4(0, in_luma.ptr);
- uchar4 luma_1 = vload4(0, in_luma.ptr + luma_input_stride_y);
- uchar4 cbcr = vload4(0, in_uv.ptr);
- char4 cb = (char4)(cbcr.s0, cbcr.s0, cbcr.s2, cbcr.s2) - (char4)(128);
- char4 cr = (char4)(cbcr.s1, cbcr.s1, cbcr.s3, cbcr.s3) - (char4)(128);
-
- float4 temp0 = (float4)(0.0000f) + (float4)(0.0000f) * convert_float4(cb) + (float4)(1.5748f) * convert_float4(cr);
- float4 temp1 = (float4)(0.0000f) - (float4)(0.1873f) * convert_float4(cb) - (float4)(0.4681f) * convert_float4(cr);
- float4 temp2 = (float4)(0.0000f) + (float4)(1.8556f) * convert_float4(cb) + (float4)(0.0000f) * convert_float4(cr);
-
- float4 f_r = convert_float4(luma_0) + temp0;
- float4 f_g = convert_float4(luma_0) + temp1;
- float4 f_b = convert_float4(luma_0) + temp2;
-
- uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
- uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
- uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
-
- uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
- uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- vstore8(rgb_0, 0, out_rgb.ptr);
- vstore8(rgb_1, 0, out_rgb.ptr + 8);
-
- f_r = convert_float4(luma_1) + temp0;
- f_g = convert_float4(luma_1) + temp1;
- f_b = convert_float4(luma_1) + temp2;
-
- r_0 = convert_uchar4_sat_rtz(f_r);
- g_0 = convert_uchar4_sat_rtz(f_g);
- b_0 = convert_uchar4_sat_rtz(f_b);
-
- rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
- rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- vstore8(rgb_0, 0, out_rgb.ptr + rgb_output_stride_y);
- vstore8(rgb_1, 0, out_rgb.ptr + rgb_output_stride_y + 8);
-}
-
-/** Convert an NV12 image to IYUV
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- */
-__kernel void NV12_to_IYUV_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // handle 32 pixels every time, two lines, each line for 16 pixels
- uchar16 luma_0 = vload16(0, in_y.ptr);
- uchar16 luma_1 = vload16(0, in_y.ptr + luma_input_stride_y);
- uchar16 cbcr = vload16(0, in_uv.ptr);
- uchar8 cb = (uchar8)(cbcr.s0, cbcr.s2, cbcr.s4, cbcr.s6, cbcr.s8, cbcr.sa, cbcr.sc, cbcr.se);
- uchar8 cr = (uchar8)(cbcr.s1, cbcr.s3, cbcr.s5, cbcr.s7, cbcr.s9, cbcr.sb, cbcr.sd, cbcr.sf);
-
- vstore16(luma_0, 0, out_y.ptr);
- vstore16(luma_1, 0, out_y.ptr + luma_output_stride_y);
- vstore8(cb, 0, out_u.ptr);
- vstore8(cr, 0, out_v.ptr);
-}
-
-/** Convert an NV12 image to YUV444
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- */
-__kernel void NV12_to_YUV444_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // handle 32 pixels every time, two lines, each line for 16 pixels
- uchar16 luma_0 = vload16(0, in_y.ptr);
- uchar16 luma_1 = vload16(0, in_y.ptr + luma_input_stride_y);
- uchar16 cbcr = vload16(0, in_uv.ptr);
- uchar16 cb = (uchar16)(cbcr.s0, cbcr.s0, cbcr.s2, cbcr.s2, cbcr.s4, cbcr.s4, cbcr.s6, cbcr.s6, cbcr.s8, cbcr.s8,
- cbcr.sa, cbcr.sa, cbcr.sc, cbcr.sc, cbcr.se, cbcr.se);
- uchar16 cr = (uchar16)(cbcr.s1, cbcr.s1, cbcr.s3, cbcr.s3, cbcr.s5, cbcr.s5, cbcr.s7, cbcr.s7, cbcr.s9, cbcr.s9,
- cbcr.sb, cbcr.sb, cbcr.sd, cbcr.sd, cbcr.sf, cbcr.sf);
-
- vstore16(luma_0, 0, out_y.ptr);
- vstore16(luma_1, 0, out_y.ptr + luma_output_stride_y);
- vstore16(cb, 0, out_u.ptr);
- vstore16(cb, 0, out_u.ptr + u_output_stride_y);
- vstore16(cr, 0, out_v.ptr);
- vstore16(cr, 0, out_v.ptr + v_output_stride_y);
-}
-
-/** Convert an NV21 image to RGB888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] rgb_output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] rgb_output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] rgb_output_step_x rgb_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgb_output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] rgb_output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgb_output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void NV21_to_RGB888_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(rgb_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_rgb = CONVERT_TO_IMAGE_STRUCT(rgb_output);
-
- // handle 8 pixels every time, two lines, each line for 4 pixels
- uchar4 luma_0 = vload4(0, in_y.ptr);
- uchar4 luma_1 = vload4(0, in_y.ptr + luma_input_stride_y);
- uchar4 cbcr = vload4(0, in_uv.ptr);
- char4 cr = (char4)(cbcr.s0, cbcr.s0, cbcr.s2, cbcr.s2) - (char4)(128);
- char4 cb = (char4)(cbcr.s1, cbcr.s1, cbcr.s3, cbcr.s3) - (char4)(128);
-
- float4 temp0 = (float4)(0.0000f) + (float4)(0.0000f) * convert_float4(cb) + (float4)(1.5748f) * convert_float4(cr);
- float4 temp1 = (float4)(0.0000f) - (float4)(0.1873f) * convert_float4(cb) - (float4)(0.4681f) * convert_float4(cr);
- float4 temp2 = (float4)(0.0000f) + (float4)(1.8556f) * convert_float4(cb) + (float4)(0.0000f) * convert_float4(cr);
-
- float4 f_r = convert_float4(luma_0) + temp0;
- float4 f_g = convert_float4(luma_0) + temp1;
- float4 f_b = convert_float4(luma_0) + temp2;
-
- uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
- uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
- uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
-
- uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
- uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
- vstore8(rgb_0, 0, out_rgb.ptr);
- vstore4(rgb_1, 0, out_rgb.ptr + 8);
-
- f_r = convert_float4(luma_1) + temp0;
- f_g = convert_float4(luma_1) + temp1;
- f_b = convert_float4(luma_1) + temp2;
-
- r_0 = convert_uchar4_sat_rtz(f_r);
- g_0 = convert_uchar4_sat_rtz(f_g);
- b_0 = convert_uchar4_sat_rtz(f_b);
-
- rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
- rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
- vstore8(rgb_0, 0, out_rgb.ptr + rgb_output_stride_y);
- vstore4(rgb_1, 0, out_rgb.ptr + rgb_output_stride_y + 8);
-}
-
-/** Convert an NV12 image to RGB8888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] rgba_output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] rgba_output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] rgba_output_step_x rgba_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgba_output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] rgba_output_step_y rgba_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgba_output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void NV21_to_RGBA8888_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(rgba_output))
-{
- Image in_luma = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_rgb = CONVERT_TO_IMAGE_STRUCT(rgba_output);
-
- // handle 8 pixels every time, two lines, each line for 4 pixels
- uchar4 luma_0 = vload4(0, in_luma.ptr);
- uchar4 luma_1 = vload4(0, in_luma.ptr + luma_input_stride_y);
- uchar4 cbcr = vload4(0, in_uv.ptr);
- char4 cr = (char4)(cbcr.s0, cbcr.s0, cbcr.s2, cbcr.s2) - (char4)(128);
- char4 cb = (char4)(cbcr.s1, cbcr.s1, cbcr.s3, cbcr.s3) - (char4)(128);
-
- float4 temp0 = (float4)(0.0000f) + (float4)(0.0000f) * convert_float4(cb) + (float4)(1.5748f) * convert_float4(cr);
- float4 temp1 = (float4)(0.0000f) - (float4)(0.1873f) * convert_float4(cb) - (float4)(0.4681f) * convert_float4(cr);
- float4 temp2 = (float4)(0.0000f) + (float4)(1.8556f) * convert_float4(cb) + (float4)(0.0000f) * convert_float4(cr);
-
- float4 f_r = convert_float4(luma_0) + temp0;
- float4 f_g = convert_float4(luma_0) + temp1;
- float4 f_b = convert_float4(luma_0) + temp2;
-
- uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
- uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
- uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
-
- uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
- uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- vstore8(rgb_0, 0, out_rgb.ptr);
- vstore8(rgb_1, 0, out_rgb.ptr + 8);
-
- f_r = convert_float4(luma_1) + temp0;
- f_g = convert_float4(luma_1) + temp1;
- f_b = convert_float4(luma_1) + temp2;
-
- r_0 = convert_uchar4_sat_rtz(f_r);
- g_0 = convert_uchar4_sat_rtz(f_g);
- b_0 = convert_uchar4_sat_rtz(f_b);
-
- rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
- rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- vstore8(rgb_0, 0, out_rgb.ptr + rgba_output_stride_y);
- vstore8(rgb_1, 0, out_rgb.ptr + rgba_output_stride_y + 8);
-}
-
-/** Convert an NV21 image to YUV444
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- */
-__kernel void NV21_to_YUV444_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // handle 32 pixels every time, two lines, each line for 16 pixels
- uchar16 luma_0 = vload16(0, in_y.ptr);
- uchar16 luma_1 = vload16(0, in_y.ptr + luma_input_stride_y);
- uchar16 cbcr = vload16(0, in_uv.ptr);
- uchar16 cr = (uchar16)(cbcr.s0, cbcr.s0, cbcr.s2, cbcr.s2, cbcr.s4, cbcr.s4, cbcr.s6, cbcr.s6, cbcr.s8, cbcr.s8,
- cbcr.sa, cbcr.sa, cbcr.sc, cbcr.sc, cbcr.se, cbcr.se);
- uchar16 cb = (uchar16)(cbcr.s1, cbcr.s1, cbcr.s3, cbcr.s3, cbcr.s5, cbcr.s5, cbcr.s7, cbcr.s7, cbcr.s9, cbcr.s9,
- cbcr.sb, cbcr.sb, cbcr.sd, cbcr.sd, cbcr.sf, cbcr.sf);
-
- vstore16(luma_0, 0, out_y.ptr);
- vstore16(luma_1, 0, out_y.ptr + luma_output_stride_y);
- vstore16(cb, 0, out_u.ptr);
- vstore16(cb, 0, out_u.ptr + u_output_stride_y);
- vstore16(cr, 0, out_v.ptr);
- vstore16(cr, 0, out_v.ptr + v_output_stride_y);
-}
-
-/** Convert an NV21 image to IYUV
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] uv_input_ptr Pointer to the source uv channel. Supported Format: U8
- * @param[in] uv_input_stride_x Stride of the source image uv channel in X dimension (in bytes)
- * @param[in] uv_input_step_x uv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uv_input_step_y uv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- */
-__kernel void NV21_to_IYUV_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(uv_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_uv = CONVERT_TO_IMAGE_STRUCT(uv_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- uchar16 luma_0 = vload16(0, in_y.ptr);
- uchar16 luma_1 = vload16(0, in_y.ptr + luma_input_stride_y);
- uchar16 cbcr = vload16(0, in_uv.ptr);
- uchar8 cr = (uchar8)(cbcr.s0, cbcr.s2, cbcr.s4, cbcr.s6, cbcr.s8, cbcr.sa, cbcr.sc, cbcr.se);
- uchar8 cb = (uchar8)(cbcr.s1, cbcr.s3, cbcr.s5, cbcr.s7, cbcr.s9, cbcr.sb, cbcr.sd, cbcr.sf);
-
- vstore16(luma_0, 0, out_y.ptr);
- vstore16(luma_1, 0, out_y.ptr + luma_output_stride_y);
- vstore8(cb, 0, out_u.ptr);
- vstore8(cr, 0, out_v.ptr);
-}
-
-/** Convert a UYVY image to IYUV using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- * No offset.
- *
- * @param[in] uyvy_input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] uyvy_input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] uyvy_input_step_x uyvy_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uyvy_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] uyvy_input_step_y uyvy_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uyvy_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- *
- */
-__kernel void UYVY422_to_IYUV_bt709(
- IMAGE_DECLARATION(uyvy_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- Image in_uyvy = CONVERT_TO_IMAGE_STRUCT(uyvy_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // handle 16 pixels every time, each line 8 pixels
- uchar16 uyvy = vload16(0, in_uyvy.ptr);
- uchar8 luma = (uchar8)(uyvy.s1, uyvy.s3, uyvy.s5, uyvy.s7, uyvy.s9, uyvy.sb, uyvy.sd, uyvy.sf);
- ushort4 cb_0 = (ushort4)(uyvy.s0, uyvy.s4, uyvy.s8, uyvy.sc);
- ushort4 cr_0 = (ushort4)(uyvy.s2, uyvy.s6, uyvy.sa, uyvy.se);
- vstore8(luma, 0, out_y.ptr);
-
- uyvy = vload16(0, in_uyvy.ptr + uyvy_input_stride_y);
- luma = (uchar8)(uyvy.s1, uyvy.s3, uyvy.s5, uyvy.s7, uyvy.s9, uyvy.sb, uyvy.sd, uyvy.sf);
- ushort4 cb_1 = (ushort4)(uyvy.s0, uyvy.s4, uyvy.s8, uyvy.sc);
- ushort4 cr_1 = (ushort4)(uyvy.s2, uyvy.s6, uyvy.sa, uyvy.se);
- vstore8(luma, 0, out_y.ptr + luma_output_stride_y);
-
- uchar4 cb = convert_uchar4((cb_0 + cb_1) / (ushort4)(2));
- uchar4 cr = convert_uchar4((cr_0 + cr_1) / (ushort4)(2));
- vstore4(cb, 0, out_u.ptr);
- vstore4(cr, 0, out_v.ptr);
-}
-
-/** Convert a YUYV image to IYUV using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- * No offset.
- *
- * @param[in] yuyv_input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] yuyv_input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] yuyv_input_step_x yuyv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] yuyv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] yuyv_input_step_y yuyv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] yuyv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- *
- */
-__kernel void YUYV422_to_IYUV_bt709(
- IMAGE_DECLARATION(yuyv_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- Image in_yuyv = CONVERT_TO_IMAGE_STRUCT(yuyv_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // handle 16 pixels every time, each line 8 pixels
- uchar16 yuyv = vload16(0, in_yuyv.ptr);
- uchar8 luma = (uchar8)(yuyv.s0, yuyv.s2, yuyv.s4, yuyv.s6, yuyv.s8, yuyv.sa, yuyv.sc, yuyv.se);
- ushort4 cb_0 = (ushort4)(yuyv.s1, yuyv.s5, yuyv.s9, yuyv.sd);
- ushort4 cr_0 = (ushort4)(yuyv.s3, yuyv.s7, yuyv.sb, yuyv.sf);
- vstore8(luma, 0, out_y.ptr);
-
- yuyv = vload16(0, in_yuyv.ptr + yuyv_input_stride_y);
- luma = (uchar8)(yuyv.s0, yuyv.s2, yuyv.s4, yuyv.s6, yuyv.s8, yuyv.sa, yuyv.sc, yuyv.se);
- ushort4 cb_1 = (ushort4)(yuyv.s1, yuyv.s5, yuyv.s9, yuyv.sd);
- ushort4 cr_1 = (ushort4)(yuyv.s3, yuyv.s7, yuyv.sb, yuyv.sf);
- vstore8(luma, 0, out_y.ptr + luma_output_stride_y);
-
- uchar4 cb = convert_uchar4((cb_0 + cb_1) / (ushort4)(2));
- uchar4 cr = convert_uchar4((cr_0 + cr_1) / (ushort4)(2));
- vstore4(cb, 0, out_u.ptr);
- vstore4(cr, 0, out_v.ptr);
-}
-
-/** Convert an IYUV image to RGB888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] u_input_ptr Pointer to the source U channel. Supported Format: U8
- * @param[in] u_input_stride_x Stride of the source image U channel in X dimension (in bytes)
- * @param[in] u_input_step_x u_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] u_input_step_y u_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_input_offset_first_element_in_bytes The offset of the first element in the source U channel
- * @param[in] v_input_ptr Pointer to the source V channel. Supported Format: U8
- * @param[in] v_input_stride_x Stride of the source image V channel in X dimension (in bytes)
- * @param[in] v_input_step_x v_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_input_stride_y Stride of the source image V channel in Y dimension (in bytes)
- * @param[in] v_input_step_y v_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_input_offset_first_element_in_bytes The offset of the first element in the source image V channel
- * @param[out] rgb_output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] rgb_output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] rgb_output_step_x rgb_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgb_output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] rgb_output_step_y rgb_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgb_output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void IYUV_to_RGB888_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(u_input),
- IMAGE_DECLARATION(v_input),
- IMAGE_DECLARATION(rgb_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_u = CONVERT_TO_IMAGE_STRUCT(u_input);
- Image in_v = CONVERT_TO_IMAGE_STRUCT(v_input);
- Image out_rgb = CONVERT_TO_IMAGE_STRUCT(rgb_output);
-
- // handle 8 pixels every time, two lines, each line for 4 pixels
- uchar4 luma_0 = vload4(0, in_y.ptr);
- uchar4 luma_1 = vload4(0, in_y.ptr + luma_input_stride_y);
- uchar4 cbcr = (uchar4)(vload2(0, in_u.ptr), vload2(0, in_v.ptr));
- char4 cb = (char4)(cbcr.s0, cbcr.s0, cbcr.s1, cbcr.s1) - (char4)(128);
- char4 cr = (char4)(cbcr.s2, cbcr.s2, cbcr.s3, cbcr.s3) - (char4)(128);
-
- float4 temp0 = (float4)(0.0000f) + (float4)(0.0000f) * convert_float4(cb) + (float4)(1.5748f) * convert_float4(cr);
- float4 temp1 = (float4)(0.0000f) - (float4)(0.1873f) * convert_float4(cb) - (float4)(0.4681f) * convert_float4(cr);
- float4 temp2 = (float4)(0.0000f) + (float4)(1.8556f) * convert_float4(cb) + (float4)(0.0000f) * convert_float4(cr);
-
- float4 f_r = convert_float4(luma_0) + temp0;
- float4 f_g = convert_float4(luma_0) + temp1;
- float4 f_b = convert_float4(luma_0) + temp2;
-
- uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
- uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
- uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
-
- uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
- uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
- vstore8(rgb_0, 0, out_rgb.ptr);
- vstore4(rgb_1, 0, out_rgb.ptr + 8);
-
- f_r = convert_float4(luma_1) + temp0;
- f_g = convert_float4(luma_1) + temp1;
- f_b = convert_float4(luma_1) + temp2;
-
- r_0 = convert_uchar4_sat_rtz(f_r);
- g_0 = convert_uchar4_sat_rtz(f_g);
- b_0 = convert_uchar4_sat_rtz(f_b);
-
- rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
- rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
- vstore8(rgb_0, 0, out_rgb.ptr + rgb_output_stride_y);
- vstore4(rgb_1, 0, out_rgb.ptr + rgb_output_stride_y + 8);
-}
-
-/** Convert an IYUV image to RGB8888
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] u_input_ptr Pointer to the source U channel. Supported Format: U8
- * @param[in] u_input_stride_x Stride of the source image U channel in X dimension (in bytes)
- * @param[in] u_input_step_x u_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] u_input_step_y u_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_input_offset_first_element_in_bytes The offset of the first element in the source U channel
- * @param[in] v_input_ptr Pointer to the source V channel. Supported Format: U8
- * @param[in] v_input_stride_x Stride of the source image V channel in X dimension (in bytes)
- * @param[in] v_input_step_x v_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_input_stride_y Stride of the source image V channel in Y dimension (in bytes)
- * @param[in] v_input_step_y v_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_input_offset_first_element_in_bytes The offset of the first element in the source image V channel
- * @param[out] rgba_output_ptr Pointer to the destination image. Supported Format: U8
- * @param[in] rgba_output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] rgba_output_step_x rgba_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] rgba_output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] rgba_output_step_y rgba_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] rgba_output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void IYUV_to_RGBA8888_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(u_input),
- IMAGE_DECLARATION(v_input),
- IMAGE_DECLARATION(rgba_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_u = CONVERT_TO_IMAGE_STRUCT(u_input);
- Image in_v = CONVERT_TO_IMAGE_STRUCT(v_input);
- Image out_rgb = CONVERT_TO_IMAGE_STRUCT(rgba_output);
-
- // handle 8 pixels every time, two lines, each line for 4 pixels
- uchar4 luma_0 = vload4(0, in_y.ptr);
- uchar4 luma_1 = vload4(0, in_y.ptr + luma_input_stride_y);
- uchar4 cbcr = (uchar4)(vload2(0, in_u.ptr), vload2(0, in_v.ptr));
- char4 cb = (char4)(cbcr.s0, cbcr.s0, cbcr.s1, cbcr.s1) - (char4)(128);
- char4 cr = (char4)(cbcr.s2, cbcr.s2, cbcr.s3, cbcr.s3) - (char4)(128);
-
- float4 temp0 = (float4)(0.0000f) + (float4)(0.0000f) * convert_float4(cb) + (float4)(1.5748f) * convert_float4(cr);
- float4 temp1 = (float4)(0.0000f) - (float4)(0.1873f) * convert_float4(cb) - (float4)(0.4681f) * convert_float4(cr);
- float4 temp2 = (float4)(0.0000f) + (float4)(1.8556f) * convert_float4(cb) + (float4)(0.0000f) * convert_float4(cr);
-
- float4 f_r = convert_float4(luma_0) + temp0;
- float4 f_g = convert_float4(luma_0) + temp1;
- float4 f_b = convert_float4(luma_0) + temp2;
-
- uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
- uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
- uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
-
- uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
- uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- vstore8(rgb_0, 0, out_rgb.ptr);
- vstore8(rgb_1, 0, out_rgb.ptr + 8);
-
- f_r = convert_float4(luma_1) + temp0;
- f_g = convert_float4(luma_1) + temp1;
- f_b = convert_float4(luma_1) + temp2;
-
- r_0 = convert_uchar4_sat_rtz(f_r);
- g_0 = convert_uchar4_sat_rtz(f_g);
- b_0 = convert_uchar4_sat_rtz(f_b);
-
- rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
- rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
- vstore8(rgb_0, 0, out_rgb.ptr + rgba_output_stride_y);
- vstore8(rgb_1, 0, out_rgb.ptr + rgba_output_stride_y + 8);
-}
-
-/** Convert an IYUV image to YUV444
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] u_input_ptr Pointer to the source U channel. Supported Format: U8
- * @param[in] u_input_stride_x Stride of the source image U channel in X dimension (in bytes)
- * @param[in] u_input_step_x u_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] u_input_step_y u_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_input_offset_first_element_in_bytes The offset of the first element in the source U channel
- * @param[in] v_input_ptr Pointer to the source V channel. Supported Format: U8
- * @param[in] v_input_stride_x Stride of the source image V channel in X dimension (in bytes)
- * @param[in] v_input_step_x v_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_input_stride_y Stride of the source image V channel in Y dimension (in bytes)
- * @param[in] v_input_step_y v_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_input_offset_first_element_in_bytes The offset of the first element in the source image V channel
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] u_output_ptr Pointer to the destination U channel. Supported Format: U8
- * @param[in] u_output_stride_x Stride of the destination U channel in X dimension (in bytes)
- * @param[in] u_output_step_x u_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] u_output_step_y u_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_output_offset_first_element_in_bytes The offset of the first element in the destination U channel
- * @param[out] v_output_ptr Pointer to the destination V channel. Supported Format: U8
- * @param[in] v_output_stride_x Stride of the destination V channel in X dimension (in bytes)
- * @param[in] v_output_step_x v_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_output_stride_y Stride of the destination V channel in Y dimension (in bytes)
- * @param[in] v_output_step_y v_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_output_offset_first_element_in_bytes The offset of the first element in the destination V channel
- *
- */
-__kernel void IYUV_to_YUV444_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(u_input),
- IMAGE_DECLARATION(v_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(u_output),
- IMAGE_DECLARATION(v_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_u = CONVERT_TO_IMAGE_STRUCT(u_input);
- Image in_v = CONVERT_TO_IMAGE_STRUCT(v_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_u = CONVERT_TO_IMAGE_STRUCT(u_output);
- Image out_v = CONVERT_TO_IMAGE_STRUCT(v_output);
-
- // handle 32 pixels every time, two lines, each line for 16 pixels
- uchar16 luma_0 = vload16(0, in_y.ptr);
- uchar16 luma_1 = vload16(0, in_y.ptr + luma_input_stride_y);
- uchar8 cb_src = vload8(0, in_u.ptr);
- uchar8 cr_src = vload8(0, in_v.ptr);
- uchar16 cb = (uchar16)(cb_src.s0, cb_src.s0, cb_src.s1, cb_src.s1, cb_src.s2, cb_src.s2, cb_src.s3, cb_src.s3,
- cb_src.s4, cb_src.s4, cb_src.s5, cb_src.s5, cb_src.s6, cb_src.s6, cb_src.s7, cb_src.s7);
- uchar16 cr = (uchar16)(cr_src.s0, cr_src.s0, cr_src.s1, cr_src.s1, cr_src.s2, cr_src.s2, cr_src.s3, cr_src.s3,
- cr_src.s4, cr_src.s4, cr_src.s5, cr_src.s5, cr_src.s6, cr_src.s6, cr_src.s7, cr_src.s7);
-
- vstore16(luma_0, 0, out_y.ptr);
- vstore16(luma_1, 0, out_y.ptr + luma_output_stride_y);
- vstore16(cb, 0, out_u.ptr);
- vstore16(cb, 0, out_u.ptr + u_output_stride_y);
- vstore16(cr, 0, out_v.ptr);
- vstore16(cr, 0, out_v.ptr + v_output_stride_y);
-}
-
-/** Convert an IYUV image to NV12
- *
- * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
- * No offset.
- *
- * @param[in] luma_input_ptr Pointer to the source luma channel. Supported Format: U8
- * @param[in] luma_input_stride_x Stride of the luma image in X dimension (in bytes)
- * @param[in] luma_input_step_x luma_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_input_stride_y Stride of the source luma channel in Y dimension (in bytes)
- * @param[in] luma_input_step_y luma_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] u_input_ptr Pointer to the source U channel. Supported Format: U8
- * @param[in] u_input_stride_x Stride of the source image U channel in X dimension (in bytes)
- * @param[in] u_input_step_x u_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] u_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] u_input_step_y u_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] u_input_offset_first_element_in_bytes The offset of the first element in the source U channel
- * @param[in] v_input_ptr Pointer to the source V channel. Supported Format: U8
- * @param[in] v_input_stride_x Stride of the source image V channel in X dimension (in bytes)
- * @param[in] v_input_step_x v_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] v_input_stride_y Stride of the source image V channel in Y dimension (in bytes)
- * @param[in] v_input_step_y v_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] v_input_offset_first_element_in_bytes The offset of the first element in the source image V channel
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] uv_output_ptr Pointer to the destination UV channel. Supported Format: U8
- * @param[in] uv_output_stride_x Stride of the destination UV channel in X dimension (in bytes)
- * @param[in] uv_output_step_x uv_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_output_stride_y Stride of the destination image U channel in Y dimension (in bytes)
- * @param[in] uv_output_step_y uv_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_output_offset_first_element_in_bytes The offset of the first element in the destination UV channel
- *
- */
-__kernel void IYUV_to_NV12_bt709(
- IMAGE_DECLARATION(luma_input),
- IMAGE_DECLARATION(u_input),
- IMAGE_DECLARATION(v_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(uv_output))
-{
- Image in_y = CONVERT_TO_IMAGE_STRUCT(luma_input);
- Image in_u = CONVERT_TO_IMAGE_STRUCT(u_input);
- Image in_v = CONVERT_TO_IMAGE_STRUCT(v_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_uv = CONVERT_TO_IMAGE_STRUCT(uv_output);
-
- // handle 32 pixels every time, two lines, each line for 16 pixels
- uchar16 luma_0 = vload16(0, in_y.ptr);
- uchar16 luma_1 = vload16(0, in_y.ptr + luma_input_stride_y);
- uchar8 cb = vload8(0, in_u.ptr);
- uchar8 cr = vload8(0, in_v.ptr);
- uchar16 cbcr = (uchar16)(cb.s0, cr.s0, cb.s1, cr.s1, cb.s2, cr.s2, cb.s3, cr.s3, cb.s4, cr.s4, cb.s5, cr.s5, cb.s6,
- cr.s6, cb.s7, cr.s7);
-
- vstore16(luma_0, 0, out_y.ptr);
- vstore16(luma_1, 0, out_y.ptr + luma_output_stride_y);
- vstore16(cbcr, 0, out_uv.ptr);
-}
-
-/** Convert a YUYV image to NV12 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- * No offset.
- *
- * @param[in] yuyv_input_ptr Pointer to the source image. Supported Format: U8
- * @param[in] yuyv_input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] yuyv_input_step_x yuyv_input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] yuyv_input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] yuyv_input_step_y yuyv_input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] yuyv_input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_output_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_output_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_output_step_x luma_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_output_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_output_step_y luma_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_output_offset_first_element_in_bytes The offset of the first element in the destination luma channel
- * @param[out] uv_output_ptr Pointer to the destination UV channel. Supported Format: U8
- * @param[in] uv_output_stride_x Stride of the destination UV channel in X dimension (in bytes)
- * @param[in] uv_output_step_x uv_output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_output_stride_y Stride of the destination image UV channel in Y dimension (in bytes)
- * @param[in] uv_output_step_y uv_output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_output_offset_first_element_in_bytes The offset of the first element in the destination UV channel
- *
- */
-__kernel void YUYV422_to_NV12_bt709(
- IMAGE_DECLARATION(yuyv_input),
- IMAGE_DECLARATION(luma_output),
- IMAGE_DECLARATION(uv_output))
-{
- Image in_yuyv = CONVERT_TO_IMAGE_STRUCT(yuyv_input);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma_output);
- Image out_uv = CONVERT_TO_IMAGE_STRUCT(uv_output);
-
- // handle 16 pixels every time, each line 8 pixels
- uchar16 yuyv = vload16(0, in_yuyv.ptr);
- ushort8 cbcr_0 = (ushort8)(yuyv.s1, yuyv.s3, yuyv.s5, yuyv.s7, yuyv.s9, yuyv.sb, yuyv.sd, yuyv.sf);
- uchar8 luma = (uchar8)(yuyv.s0, yuyv.s2, yuyv.s4, yuyv.s6, yuyv.s8, yuyv.sa, yuyv.sc, yuyv.se);
- vstore8(luma, 0, out_y.ptr);
-
- yuyv = vload16(0, in_yuyv.ptr + yuyv_input_stride_y);
- ushort8 cbcr_1 = (ushort8)(yuyv.s1, yuyv.s3, yuyv.s5, yuyv.s7, yuyv.s9, yuyv.sb, yuyv.sd, yuyv.sf);
- luma = (uchar8)(yuyv.s0, yuyv.s2, yuyv.s4, yuyv.s6, yuyv.s8, yuyv.sa, yuyv.sc, yuyv.se);
- vstore8(luma, 0, out_y.ptr + luma_output_stride_y);
-
- uchar8 cbcr = convert_uchar8((cbcr_0 + cbcr_1) / (ushort8)(2));
- vstore8(cbcr, 0, out_uv.ptr);
-}
-
-/** Convert a UYVY image to NV12 using BT709 color space
- *
- * Global Workgroup Size [ DIV_CEIL(width, 4), height ]
- * No offset.
- *
- * @param[in] input_uyvy_ptr Pointer to the source image. Supported Format: U8
- * @param[in] input_uyvy_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_uyvy_step_x input_uyvy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_uyvy_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_uyvy_step_y input_uyvy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_uyvy_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] luma_ptr Pointer to the destination luma channel. Supported Format: U8
- * @param[in] luma_stride_x Stride of the destination luma channel in X dimension (in bytes)
- * @param[in] luma_step_x luma_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] luma_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] luma_step_y luma_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] luma_offset_first_element_in_bytes The offset of the first element in the destination image luma channel
- * @param[out] uv_ptr Pointer to the destination uv channel. Supported Format: U8
- * @param[in] uv_stride_x Stride of the destination uv channel in X dimension (in bytes)
- * @param[in] uv_step_x uv_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] uv_stride_y Stride of the destination image luma channel in Y dimension (in bytes)
- * @param[in] uv_step_y uv_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] uv_offset_first_element_in_bytes The offset of the first element in the destination image uv channel
- *
- */
-__kernel void UYVY422_to_NV12_bt709(
- IMAGE_DECLARATION(input_uyvy),
- IMAGE_DECLARATION(luma),
- IMAGE_DECLARATION(uv))
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input_uyvy);
- Image out_y = CONVERT_TO_IMAGE_STRUCT(luma);
- Image out_uv = CONVERT_TO_IMAGE_STRUCT(uv);
-
- // handle 16 pixels every time, each line 8 pixels
- const uchar16 uyvy_t = vload16(0, in.ptr);
- vstore8(uyvy_t.s13579bdf, 0, out_y.ptr);
-
- const uchar16 uyvy_b = vload16(0, in.ptr + input_uyvy_stride_y);
- vstore8(uyvy_b.s13579bdf, 0, out_y.ptr + luma_stride_y);
-
- const ushort8 cbcr_t = (ushort8)(uyvy_t.s0, uyvy_t.s2, uyvy_t.s4, uyvy_t.s6, uyvy_t.s8, uyvy_t.sa, uyvy_t.sc, uyvy_t.se);
- const ushort8 cbcr_b = (ushort8)(uyvy_b.s0, uyvy_b.s2, uyvy_b.s4, uyvy_b.s6, uyvy_b.s8, uyvy_b.sa, uyvy_b.sc, uyvy_b.se);
- const uchar8 cbcr = convert_uchar8((cbcr_t + cbcr_b) / (ushort8)(2));
- vstore8(cbcr, 0, out_uv.ptr);
-}
diff --git a/src/core/CL/cl_kernels/convolution3x3.cl b/src/core/CL/cl_kernels/convolution3x3.cl
deleted file mode 100644
index 7bca567b11..0000000000
--- a/src/core/CL/cl_kernels/convolution3x3.cl
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-#ifndef DATA_TYPE
-#define DATA_TYPE short
-#endif /* DATA_TYPE */
-
-#ifndef DATA_TYPE_OUT
-#define DATA_TYPE_OUT uchar
-#endif /* DATA_TYPE_OUT */
-
-/** Compute a 1D horizontal convolution of size 3 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
- *
- * @param[in] left_pixel Pointer to the left pixel.
- * @param[in] left_coeff Weight of the left pixel
- * @param[in] middle_coeff Weight of the middle pixel
- * @param[in] right_coeff Weight of the right pixel
- *
- * @return a short8 containing 8 convoluted values.
- */
-inline VEC_DATA_TYPE(DATA_TYPE, 8) convolution1x3(__global const uchar *left_pixel,
- const short left_coeff,
- const short middle_coeff,
- const short right_coeff)
-{
- uchar16 temp = vload16(0, left_pixel);
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left = CONVERT(temp.s01234567, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- middle = CONVERT(temp.s12345678, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right = CONVERT(temp.s23456789, VEC_DATA_TYPE(DATA_TYPE, 8));
-
- return left * (VEC_DATA_TYPE(DATA_TYPE, 8))left_coeff + middle * (VEC_DATA_TYPE(DATA_TYPE, 8))middle_coeff + right * (VEC_DATA_TYPE(DATA_TYPE, 8))right_coeff;
-}
-
-/** Apply a 3x3 convolution matrix to a single channel U8 input image and return the result.
- *
- * Convolution matrix layout:
- *
- * [ mat0, mat1, mat2 ]\n
- * [ mat3, mat4, mat5 ]\n
- * [ mat6, mat7, mat8 ]\n
- *
- * @param[in] src A pointer to source Image structure
- * @param[in] mat0 Coefficient from the convolution matrix
- * @param[in] mat1 Coefficient from the convolution matrix
- * @param[in] mat2 Coefficient from the convolution matrix
- * @param[in] mat3 Coefficient from the convolution matrix
- * @param[in] mat4 Coefficient from the convolution matrix
- * @param[in] mat5 Coefficient from the convolution matrix
- * @param[in] mat6 Coefficient from the convolution matrix
- * @param[in] mat7 Coefficient from the convolution matrix
- * @param[in] mat8 Coefficient from the convolution matrix
- * @param[in] scale Convolution matrix scale (Sum of the coefficients, or 1 if the sum is 0)
- *
- * @return a short8 containing 8 convoluted and scaled values.
- */
-inline VEC_DATA_TYPE(DATA_TYPE, 8) convolution3x3(
- Image *src,
- const short mat0, const short mat1, const short mat2,
- const short mat3, const short mat4, const short mat5,
- const short mat6, const short mat7, const short mat8, uint scale)
-{
- // Output pixels
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels;
-
- // Row 0
- pixels = convolution1x3(offset(src, -1, -1), mat0, mat1, mat2);
- // Row
- pixels += convolution1x3(offset(src, -1, 0), mat3, mat4, mat5);
- // Row 2
- pixels += convolution1x3(offset(src, -1, 1), mat6, mat7, mat8);
-
- // Divide by the scale
- return pixels / (VEC_DATA_TYPE(DATA_TYPE, 8))scale;
-}
-
-#ifndef DYNAMIC_MATRIX_CONVOLUTION
-
-/** Apply a 3x3 static convolution matrix to a single channel U8 input image and output a single channel image.
- *
- * @attention The matrix coefficients(MAT0, MAT1, ... MAT8, SCALE), DATA_TYPE, and DATA_TYPE_OUT need to be passed at compile time.\n
- * e.g. -DMAT0=1 -DMAT2=2, ...-DMAT8=8, -DSCALE=1, -DDATA_TYPE=int, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution3x3_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels = convolution3x3(&src,
- MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8, SCALE);
-
- // Store the result as is in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, (__global DATA_TYPE_OUT *)dst.ptr);
-}
-
-#endif // DYNAMIC_MATRIX_CONVOLUTION
diff --git a/src/core/CL/cl_kernels/convolution5x5.cl b/src/core/CL/cl_kernels/convolution5x5.cl
deleted file mode 100644
index 9995ebfa90..0000000000
--- a/src/core/CL/cl_kernels/convolution5x5.cl
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-#ifndef DATA_TYPE
-#define DATA_TYPE short
-#endif /* DATA_TYPE */
-
-#ifndef COMPUTE_TYPE
-#define COMPUTE_TYPE int
-#endif /* COMPUTE_TYPE */
-
-#ifndef DATA_TYPE_OUT
-#define DATA_TYPE_OUT uchar
-#endif /* DATA_TYPE_OUT */
-
-/** Compute a 1D horizontal convolution of size 5 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
- *
- * @param[in] left_pixel Pointer to the left pixel
- * @param[in] left1_coeff Weight of the most left pixel
- * @param[in] left2_coeff Weight of the left pixel
- * @param[in] middle_coeff Weight of the middle pixel
- * @param[in] right1_coeff Weight of the right pixel
- * @param[in] right2_coeff Weight of the most right pixel
- *
- * @return a short8 containing 8 convoluted values.
- */
-VEC_DATA_TYPE(DATA_TYPE, 8)
-convolution1x5(
- __global const uchar *left_pixel,
- const short left1_coeff,
- const short left2_coeff,
- const short middle_coeff,
- const short right1_coeff,
- const short right2_coeff)
-{
- uchar16 temp = vload16(0, left_pixel);
-
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left1 = CONVERT(temp.s01234567, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left2 = CONVERT(temp.s12345678, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- middle = CONVERT(temp.s23456789, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right1 = CONVERT(temp.s3456789a, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right2 = CONVERT(temp.s456789ab, VEC_DATA_TYPE(DATA_TYPE, 8));
-
- return left1 * (VEC_DATA_TYPE(DATA_TYPE, 8))left1_coeff + left2 * (VEC_DATA_TYPE(DATA_TYPE, 8))left2_coeff
- + middle * (VEC_DATA_TYPE(DATA_TYPE, 8))middle_coeff + right1 * (VEC_DATA_TYPE(DATA_TYPE, 8))right1_coeff + right2 * (VEC_DATA_TYPE(DATA_TYPE, 8))right2_coeff;
-}
-
-/** Compute a 1D vertical convolution of size 5 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
- *
- * @param[in] src Pointer to source image.
- * @param[in] up1_coeff Weight of the most up pixel
- * @param[in] up2_coeff Weight of the up pixel
- * @param[in] middle_coeff Weight of the middle pixel
- * @param[in] down1_coeff Weight of the down pixel
- * @param[in] down2_coeff Weight of the most down pixel
- *
- * @return a short8 containing 8 convoluted values.
- */
-VEC_DATA_TYPE(COMPUTE_TYPE, 8)
-convolution5x1(
- Image *src,
- const short up1_coeff,
- const short up2_coeff,
- const short middle_coeff,
- const short down1_coeff,
- const short down2_coeff)
-{
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- val;
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- out = (VEC_DATA_TYPE(COMPUTE_TYPE, 8))0;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -2)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up1_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -1)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up2_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 0)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))middle_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 1)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down1_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 2)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down2_coeff;
-
- return out;
-}
-
-/** Apply a 5x5 convolution matrix to a single channel U8 input image and return the result.
- *
- * Convolution matrix layout:\n
- * [ mat0, mat1, mat2, mat3 , mat4 ]\n
- * [ mat5, mat6, mat7, mat8, mat9 ]\n
- * [ mat10, mat11, mat12, mat13, mat14 ]\n
- * [ mat15, mat16, mat17, mat18, mat19 ]\n
- * [ mat20, mat21, mat22, mat23, mat24 ]
- *
- * @param[in] src A pointer to source Image structure.
- * @param[in] mat0 Coefficient from the convolution matrix
- * @param[in] mat1 Coefficient from the convolution matrix
- * @param[in] mat2 Coefficient from the convolution matrix
- * @param[in] mat3 Coefficient from the convolution matrix
- * @param[in] mat4 Coefficient from the convolution matrix
- * @param[in] mat5 Coefficient from the convolution matrix
- * @param[in] mat6 Coefficient from the convolution matrix
- * @param[in] mat7 Coefficient from the convolution matrix
- * @param[in] mat8 Coefficient from the convolution matrix
- * @param[in] mat9 Coefficient from the convolution matrix
- * @param[in] mat10 Coefficient from the convolution matrix
- * @param[in] mat11 Coefficient from the convolution matrix
- * @param[in] mat12 Coefficient from the convolution matrix
- * @param[in] mat13 Coefficient from the convolution matrix
- * @param[in] mat14 Coefficient from the convolution matrix
- * @param[in] mat15 Coefficient from the convolution matrix
- * @param[in] mat16 Coefficient from the convolution matrix
- * @param[in] mat17 Coefficient from the convolution matrix
- * @param[in] mat18 Coefficient from the convolution matrix
- * @param[in] mat19 Coefficient from the convolution matrix
- * @param[in] mat20 Coefficient from the convolution matrix
- * @param[in] mat21 Coefficient from the convolution matrix
- * @param[in] mat22 Coefficient from the convolution matrix
- * @param[in] mat23 Coefficient from the convolution matrix
- * @param[in] mat24 Coefficient from the convolution matrix
- * @param[in] scale Convolution matrix scale (Sum of the coefficients, or 1 if the sum is 0)
- *
- * @return a short8 containing 8 convoluted and scaled values.
- */
-short8 convolution5x5(
- Image *src,
- const short mat0, const short mat1, const short mat2, const short mat3, const short mat4,
- const short mat5, const short mat6, const short mat7, const short mat8, const short mat9,
- const short mat10, const short mat11, const short mat12, const short mat13, const short mat14,
- const short mat15, const short mat16, const short mat17, const short mat18, const short mat19,
- const short mat20, const short mat21, const short mat22, const short mat23, const short mat24,
- uint scale)
-{
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels;
-
- pixels = convolution1x5(offset(src, -2, -2), mat0, mat1, mat2, mat3, mat4);
- pixels += convolution1x5(offset(src, -2, -1), mat5, mat6, mat7, mat8, mat9);
- pixels += convolution1x5(offset(src, -2, 0), mat10, mat11, mat12, mat13, mat14);
- pixels += convolution1x5(offset(src, -2, 1), mat15, mat16, mat17, mat18, mat19);
- pixels += convolution1x5(offset(src, -2, 2), mat20, mat21, mat22, mat23, mat24);
-
- if(scale > 0)
- {
- pixels /= (VEC_DATA_TYPE(DATA_TYPE, 8))scale;
- }
-
- return convert_short8_sat(pixels);
-}
-
-#ifndef DYNAMIC_MATRIX_CONVOLUTION
-
-/** Apply a 1x5 static convolution matrix to a single channel U8 input image and output a single temporary channel image(Support U16, S16, S32).
- *
- * @attention The matrix coefficients (MAT0, MAT1, MAT2, MAT3, MAT4) and DATA_TYPE need to be passed at compile time:\n
- * e.g. -DMAT0=1 -DMAT2=2, -DMAT3=3, -DMAT4=4, -DDATA_TYPE=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U16, S16, S32
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution_separable1x5_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Output pixels
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels = convolution1x5(offset(&src, -2, 0), MAT0, MAT1, MAT2, MAT3, MAT4);
-
- // Store result in dst
- vstore8(pixels, 0, (__global DATA_TYPE *)dst.ptr);
-}
-
-/** Apply a 5x1 static convolution matrix to a single channel U8 input image and output a single channel image.
- *
- * @attention The matrix coefficients (MAT5, MAT6, MAT7, MAT8, MAT9, SCALE), COMPUTE_TYPE and DATA_TYPE_OUT need to be passed at compile time:\n
- * e.g. -DMAT5=1 -DMAT6=2, -DMAT7=3, -DMAT8=4, -DMAT9=5, -DSCALE=6, -DCOMPUTE_TYPE=int, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U16, S16, S32
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution_separable5x1_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Output pixels
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- pixels = convolution5x1(&src, MAT5, MAT6, MAT7, MAT8, MAT9);
-
- // Divide by the scale
- pixels /= (VEC_DATA_TYPE(COMPUTE_TYPE, 8))SCALE;
-
- // Store result in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, (__global DATA_TYPE_OUT *)dst.ptr);
-}
-
-/** Apply a static 5x5 convolution matrix to a single channel U8 input image and output a single channel image including borders
- *
- * @attention The matrix coefficients(MAT0, MAT1, ... MAT24, SCALE), DATA_TYPE_OUT need to be passed at compile time:\n
- * e.g. -DMAT0=1 -DMAT1=2, ... -DMAT24=24, -DSCALE=6, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution5x5_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- short8 pixels = convolution5x5(&src,
- MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8, MAT9, MAT10, MAT11, MAT12, MAT13,
- MAT14, MAT15, MAT16, MAT17, MAT18, MAT19, MAT20, MAT21, MAT22, MAT23, MAT24, SCALE);
-
- // Store the result as is in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, (__global DATA_TYPE_OUT *)dst.ptr);
-}
-
-#endif // DYNAMIC_MATRIX_CONVOLUTION
diff --git a/src/core/CL/cl_kernels/convolution7x7.cl b/src/core/CL/cl_kernels/convolution7x7.cl
deleted file mode 100644
index 50fb3d7f35..0000000000
--- a/src/core/CL/cl_kernels/convolution7x7.cl
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-#ifndef DATA_TYPE
-#define DATA_TYPE short
-#endif /* DATA_TYPE */
-
-#ifndef COMPUTE_TYPE
-#define COMPUTE_TYPE int
-#endif /* COMPUTE_TYPE */
-
-#ifndef DATA_TYPE_OUT
-#define DATA_TYPE_OUT uchar
-#endif /* DATA_TYPE_OUT */
-
-/** Compute a 1D horizontal convolution of size 7 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
- *
- * @param[in] left_pixel Pointer to the left pixel
- * @param[in] left1_coeff Weight of the most left pixel
- * @param[in] left2_coeff Weight of the second left pixel
- * @param[in] left3_coeff Weight of the left pixel
- * @param[in] middle_coeff Weight of the middle pixel
- * @param[in] right1_coeff Weight of the right pixel
- * @param[in] right2_coeff Weight of the second right pixel
- * @param[in] right3_coeff Weight of the most right pixel
- *
- * @return a short8 containing 8 convoluted values.
- */
-VEC_DATA_TYPE(DATA_TYPE, 8)
-convolution1x7(
- __global const uchar *left_pixel,
- const short left1_coeff,
- const short left2_coeff,
- const short left3_coeff,
- const short middle_coeff,
- const short right1_coeff,
- const short right2_coeff,
- const short right3_coeff)
-{
- uchar16 temp = vload16(0, left_pixel);
-
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left1 = CONVERT(temp.s01234567, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left2 = CONVERT(temp.s12345678, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left3 = CONVERT(temp.s23456789, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- middle = CONVERT(temp.s3456789a, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right1 = CONVERT(temp.s456789ab, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right2 = CONVERT(temp.s56789abc, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right3 = CONVERT(temp.s6789abcd, VEC_DATA_TYPE(DATA_TYPE, 8));
-
- return left1 * (VEC_DATA_TYPE(DATA_TYPE, 8))left1_coeff + left2 * (VEC_DATA_TYPE(DATA_TYPE, 8))left2_coeff + left3 * (VEC_DATA_TYPE(DATA_TYPE, 8))left3_coeff + middle * (VEC_DATA_TYPE(DATA_TYPE,
- 8))middle_coeff + right1 * (VEC_DATA_TYPE(DATA_TYPE, 8))right1_coeff + right2 * (VEC_DATA_TYPE(DATA_TYPE, 8))right2_coeff + right3 * (VEC_DATA_TYPE(DATA_TYPE, 8))right3_coeff;
-}
-
-/** Compute a 1D vertical convolution of size 7 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
- *
- * @param[in] src Pointer to source image.
- * @param[in] up1_coeff Weight of the most up pixel
- * @param[in] up2_coeff Weight of the second up pixel
- * @param[in] up3_coeff Weight of the up pixel
- * @param[in] middle_coeff Weight of the middle pixel
- * @param[in] down1_coeff Weight of the down pixel
- * @param[in] down2_coeff Weight of the second down pixel
- * @param[in] down3_coeff Weight of the third down pixel
- *
- * @return a short8 containing 8 convoluted values.
- */
-VEC_DATA_TYPE(COMPUTE_TYPE, 8)
-convolution7x1(
- Image *src,
- const short up1_coeff,
- const short up2_coeff,
- const short up3_coeff,
- const short middle_coeff,
- const short down1_coeff,
- const short down2_coeff,
- const short down3_coeff)
-{
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- val;
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- out = (VEC_DATA_TYPE(COMPUTE_TYPE, 8))0;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -3)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up1_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -2)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up2_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -1)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up3_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 0)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))middle_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 1)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down1_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 2)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down2_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 3)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down3_coeff;
-
- return out;
-}
-
-/** Apply a 7x7 convolution matrix to a single channel U8 input image and return the result.
- *
- * Convolution matrix layout:\n
- * [ mat0, mat1, mat2, mat3 , mat4, mat5, mat6 ]\n
- * [ mat7, mat8, mat9, mat10, mat11, mat12, mat13 ]\n
- * [ mat14, mat15, mat16, mat17, mat18, mat19, mat20 ]\n
- * [ mat21, mat22, mat23, mat24, mat25, mat26, mat27 ]\n
- * [ mat28, mat29, mat30, mat31, mat32, mat33, mat34 ]\n
- * [ mat35, mat36, mat37, mat38, mat39, mat40, mat41 ]\n
- * [ mat42, mat43, mat44, mat45, mat46, mat47, mat48 ]
- *
- * @param[in] src A pointer to source Image structure.
- * @param[in] mat0 Coefficient from the convolution matrix
- * @param[in] mat1 Coefficient from the convolution matrix
- * @param[in] mat2 Coefficient from the convolution matrix
- * @param[in] mat3 Coefficient from the convolution matrix
- * @param[in] mat4 Coefficient from the convolution matrix
- * @param[in] mat5 Coefficient from the convolution matrix
- * @param[in] mat6 Coefficient from the convolution matrix
- * @param[in] mat7 Coefficient from the convolution matrix
- * @param[in] mat8 Coefficient from the convolution matrix
- * @param[in] mat9 Coefficient from the convolution matrix
- * @param[in] mat10 Coefficient from the convolution matrix
- * @param[in] mat11 Coefficient from the convolution matrix
- * @param[in] mat12 Coefficient from the convolution matrix
- * @param[in] mat13 Coefficient from the convolution matrix
- * @param[in] mat14 Coefficient from the convolution matrix
- * @param[in] mat15 Coefficient from the convolution matrix
- * @param[in] mat16 Coefficient from the convolution matrix
- * @param[in] mat17 Coefficient from the convolution matrix
- * @param[in] mat18 Coefficient from the convolution matrix
- * @param[in] mat19 Coefficient from the convolution matrix
- * @param[in] mat20 Coefficient from the convolution matrix
- * @param[in] mat21 Coefficient from the convolution matrix
- * @param[in] mat22 Coefficient from the convolution matrix
- * @param[in] mat23 Coefficient from the convolution matrix
- * @param[in] mat24 Coefficient from the convolution matrix
- * @param[in] mat25 Coefficient from the convolution matrix
- * @param[in] mat26 Coefficient from the convolution matrix
- * @param[in] mat27 Coefficient from the convolution matrix
- * @param[in] mat28 Coefficient from the convolution matrix
- * @param[in] mat29 Coefficient from the convolution matrix
- * @param[in] mat30 Coefficient from the convolution matrix
- * @param[in] mat31 Coefficient from the convolution matrix
- * @param[in] mat32 Coefficient from the convolution matrix
- * @param[in] mat33 Coefficient from the convolution matrix
- * @param[in] mat34 Coefficient from the convolution matrix
- * @param[in] mat35 Coefficient from the convolution matrix
- * @param[in] mat36 Coefficient from the convolution matrix
- * @param[in] mat37 Coefficient from the convolution matrix
- * @param[in] mat38 Coefficient from the convolution matrix
- * @param[in] mat39 Coefficient from the convolution matrix
- * @param[in] mat40 Coefficient from the convolution matrix
- * @param[in] mat41 Coefficient from the convolution matrix
- * @param[in] mat42 Coefficient from the convolution matrix
- * @param[in] mat43 Coefficient from the convolution matrix
- * @param[in] mat44 Coefficient from the convolution matrix
- * @param[in] mat45 Coefficient from the convolution matrix
- * @param[in] mat46 Coefficient from the convolution matrix
- * @param[in] mat47 Coefficient from the convolution matrix
- * @param[in] mat48 Coefficient from the convolution matrix
- * @param[in] scale Convolution matrix scale (Sum of the coefficients, or 1 if the sum is 0)
- *
- */
-short8 convolution7x7(
- Image *src,
- const short mat0, const short mat1, const short mat2, const short mat3, const short mat4,
- const short mat5, const short mat6, const short mat7, const short mat8, const short mat9,
- const short mat10, const short mat11, const short mat12, const short mat13, const short mat14,
- const short mat15, const short mat16, const short mat17, const short mat18, const short mat19,
- const short mat20, const short mat21, const short mat22, const short mat23, const short mat24,
- const short mat25, const short mat26, const short mat27, const short mat28, const short mat29,
- const short mat30, const short mat31, const short mat32, const short mat33, const short mat34,
- const short mat35, const short mat36, const short mat37, const short mat38, const short mat39,
- const short mat40, const short mat41, const short mat42, const short mat43, const short mat44,
- const short mat45, const short mat46, const short mat47, const short mat48, uint scale)
-{
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels;
-
- pixels = convolution1x7(offset(src, -3, -3), mat0, mat1, mat2, mat3, mat4, mat5, mat6);
- pixels += convolution1x7(offset(src, -3, -2), mat7, mat8, mat9, mat10, mat11, mat12, mat13);
- pixels += convolution1x7(offset(src, -3, -1), mat14, mat15, mat16, mat17, mat18, mat19, mat20);
- pixels += convolution1x7(offset(src, -3, 0), mat21, mat22, mat23, mat24, mat25, mat26, mat27);
- pixels += convolution1x7(offset(src, -3, 1), mat28, mat29, mat30, mat31, mat32, mat33, mat34);
- pixels += convolution1x7(offset(src, -3, 2), mat35, mat36, mat37, mat38, mat39, mat40, mat41);
- pixels += convolution1x7(offset(src, -3, 3), mat42, mat43, mat44, mat45, mat46, mat47, mat48);
-
- if(scale > 0)
- {
- pixels /= (VEC_DATA_TYPE(DATA_TYPE, 8))scale;
- }
-
- return convert_short8_sat(pixels);
-}
-
-#ifndef DYNAMIC_MATRIX_CONVOLUTION
-
-/** Apply a 1x7 static convolution matrix to a single channel U8 input image and output a single temporary channel image.
- *
- * @attention The matrix coefficients (MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6) and DATA_TYPE need to be passed at compile time:\n
- * e.g. -DMAT0=1 -DMAT1=2, ... -DMAT6=6, -DDATA_TYPE=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U16, S16, S32
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution_separable1x7_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Output pixels
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels = convolution1x7(offset(&src, -3, 0), MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6);
-
- // Store result in dst
- vstore8(pixels, 0, (__global DATA_TYPE *)dst.ptr);
-}
-
-/** Apply a 7x1 static convolution matrix to a single channel U8 input image and output a single channel image.
- *
- * @attention The matrix coefficients (MAT7, MAT8, MAT9, MAT10, MAT11, MAT12, MAT13, SCALE), COMPUTE_TYPE and DATA_TYPE_OUT need to be passed at compile time:\n
- * e.g. -DMAT0=7 -DMAT1=8, ... -DMAT24=13, -DSCALE=6, -DCOMPUTE_TYPE=int, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U16, S16, S32
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution_separable7x1_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Output pixels
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- pixels = convolution7x1(&src, MAT7, MAT8, MAT9, MAT10, MAT11, MAT12, MAT13);
-
- // Divide by the scale
- pixels /= (VEC_DATA_TYPE(COMPUTE_TYPE, 8))SCALE;
-
- // Store result in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, (__global DATA_TYPE_OUT *)dst.ptr);
-}
-
-/** Apply a static 7x7 convolution matrix to a single channel U8 input image and output a single channel U8 image including the borders.
- *
- * @attention The matrix coefficients(MAT0, MAT1, ... MAT48, SCALE), DATA_TYPE_OUT need to be passed at compile time:\n
- * e.g. -DMAT0=7 -DMAT1=8, ... -DMAT48=48, -DSCALE=6, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution7x7_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- short8 pixels = convolution7x7(&src,
- MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8, MAT9, MAT10, MAT11, MAT12, MAT13,
- MAT14, MAT15, MAT16, MAT17, MAT18, MAT19, MAT20, MAT21, MAT22, MAT23, MAT24, MAT25,
- MAT26, MAT27, MAT28, MAT29, MAT30, MAT31, MAT32, MAT33, MAT34, MAT35, MAT36, MAT37,
- MAT38, MAT39, MAT40, MAT41, MAT42, MAT43, MAT44, MAT45, MAT46, MAT47, MAT48, SCALE);
-
- // Clamp results to [ 0, 255 ] and store them in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, (__global DATA_TYPE_OUT *)dst.ptr);
-}
-
-#endif // DYNAMIC_MATRIX_CONVOLUTION
diff --git a/src/core/CL/cl_kernels/convolution9x9.cl b/src/core/CL/cl_kernels/convolution9x9.cl
deleted file mode 100644
index 7e77c61fea..0000000000
--- a/src/core/CL/cl_kernels/convolution9x9.cl
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-#ifndef DATA_TYPE
-#define DATA_TYPE short
-#endif /* DATA_TYPE */
-
-#ifndef COMPUTE_TYPE
-#define COMPUTE_TYPE int
-#endif /* COMPUTE_TYPE */
-
-#ifndef DATA_TYPE_OUT
-#define DATA_TYPE_OUT uchar
-#endif /* DATA_TYPE_OUT */
-
-/** Compute a 1D horizontal convolution of size 9 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
- *
- * @param[in] left_pixel Pointer to the left pixel
- * @param[in] left1_coeff Weight of the most left pixel
- * @param[in] left2_coeff Weight of the second left pixel
- * @param[in] left3_coeff Weight of the third left pixel
- * @param[in] left4_coeff Weight of the left pixel
- * @param[in] middle_coeff Weight of the middle pixel
- * @param[in] right1_coeff Weight of the right pixel
- * @param[in] right2_coeff Weight of the second right pixel
- * @param[in] right3_coeff Weight of the third right pixel
- * @param[in] right4_coeff Weight of the most right pixel
- *
- * @return a short8 containing 8 convoluted values.
- */
-VEC_DATA_TYPE(DATA_TYPE, 8)
-convolution1x9(
- __global const uchar *left_pixel,
- const short left1_coeff,
- const short left2_coeff,
- const short left3_coeff,
- const short left4_coeff,
- const short middle_coeff,
- const short right1_coeff,
- const short right2_coeff,
- const short right3_coeff,
- const short right4_coeff)
-{
- uchar16 temp = vload16(0, left_pixel);
-
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left1 = CONVERT(temp.s01234567, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left2 = CONVERT(temp.s12345678, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left3 = CONVERT(temp.s23456789, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- left4 = CONVERT(temp.s3456789a, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- middle = CONVERT(temp.s456789ab, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right1 = CONVERT(temp.s56789abc, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right2 = CONVERT(temp.s6789abcd, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right3 = CONVERT(temp.s789abcde, VEC_DATA_TYPE(DATA_TYPE, 8));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- right4 = CONVERT(temp.s89abcdef, VEC_DATA_TYPE(DATA_TYPE, 8));
-
- return left1 * (VEC_DATA_TYPE(DATA_TYPE, 8))left1_coeff + left2 * (VEC_DATA_TYPE(DATA_TYPE, 8))left2_coeff + left3 * (VEC_DATA_TYPE(DATA_TYPE, 8))left3_coeff + left4 * (VEC_DATA_TYPE(DATA_TYPE,
- 8))left4_coeff + middle * (VEC_DATA_TYPE(DATA_TYPE, 8))middle_coeff + right1 * (VEC_DATA_TYPE(DATA_TYPE, 8))right1_coeff + right2 * (VEC_DATA_TYPE(DATA_TYPE,
- 8))right2_coeff + right3 * (VEC_DATA_TYPE(DATA_TYPE, 8))right3_coeff + right4 * (VEC_DATA_TYPE(DATA_TYPE, 8))right4_coeff;
-}
-
-/** Compute a 1D vertical convolution of size 9 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels).
- *
- * @param[in] src Pointer to source image.
- * @param[in] up1_coeff Weight of the most up pixel
- * @param[in] up2_coeff Weight of the second up pixel
- * @param[in] up3_coeff Weight of the third up pixel
- * @param[in] up4_coeff Weight of the up pixel
- * @param[in] middle_coeff Weight of the middle pixel
- * @param[in] down1_coeff Weight of the down pixel
- * @param[in] down2_coeff Weight of the second down pixel
- * @param[in] down3_coeff Weight of the third down pixel
- * @param[in] down4_coeff Weight of the most down pixel
- *
- * @return a short8 containing 8 convoluted values.
- */
-VEC_DATA_TYPE(COMPUTE_TYPE, 8)
-convolution9x1(
- Image *src,
- const short up1_coeff,
- const short up2_coeff,
- const short up3_coeff,
- const short up4_coeff,
- const short middle_coeff,
- const short down1_coeff,
- const short down2_coeff,
- const short down3_coeff,
- const short down4_coeff)
-{
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- val;
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- out = (VEC_DATA_TYPE(COMPUTE_TYPE, 8))0;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -4)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up1_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -3)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up2_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -2)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up3_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, -1)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))up4_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 0)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))middle_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 1)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down1_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 2)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down2_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 3)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down3_coeff;
-
- val = CONVERT(vload8(0, (__global DATA_TYPE *)offset(src, 0, 4)), VEC_DATA_TYPE(COMPUTE_TYPE, 8));
- out += val * (VEC_DATA_TYPE(COMPUTE_TYPE, 8))down4_coeff;
-
- return out;
-}
-
-/** Apply a 9x9 convolution matrix to a single channel U8 input image and return the result.
- *
- * Convolution matrix layout:\n
- * [ mat0, mat1, mat2, mat3 , mat4, mat5, mat6, mat7, mat8 ]\n
- * [ mat9, mat10, mat11, mat12, mat13, mat14, mat15, mat16, mat17 ]\n
- * [ mat18, mat19, mat20, mat21, mat22, mat23, mat24, mat25, mat26 ]\n
- * [ mat27, mat28, mat29, mat30, mat31, mat32, mat33, mat34, mat35 ]\n
- * [ mat36, mat37, mat38, mat39, mat40, mat41, mat42, mat43, mat44 ]\n
- * [ mat45, mat46, mat47, mat48, mat49, mat50, mat51, mat52, mat53 ]\n
- * [ mat54, mat55, mat56, mat57, mat58, mat59, mat60, mat61, mat62 ]
- * [ mat63, mat64, mat65, mat66, mat67, mat68, mat69, mat70, mat71 ]
- * [ mat72, mat73, mat74, mat75, mat76, mat77, mat78, mat79, mat80 ]
- *
- * @param[in] src A pointer to source Image structure.
- * @param[in] mat0 Coefficient from the convolution matrix
- * @param[in] mat1 Coefficient from the convolution matrix
- * @param[in] mat2 Coefficient from the convolution matrix
- * @param[in] mat3 Coefficient from the convolution matrix
- * @param[in] mat4 Coefficient from the convolution matrix
- * @param[in] mat5 Coefficient from the convolution matrix
- * @param[in] mat6 Coefficient from the convolution matrix
- * @param[in] mat7 Coefficient from the convolution matrix
- * @param[in] mat8 Coefficient from the convolution matrix
- * @param[in] mat9 Coefficient from the convolution matrix
- * @param[in] mat10 Coefficient from the convolution matrix
- * @param[in] mat11 Coefficient from the convolution matrix
- * @param[in] mat12 Coefficient from the convolution matrix
- * @param[in] mat13 Coefficient from the convolution matrix
- * @param[in] mat14 Coefficient from the convolution matrix
- * @param[in] mat15 Coefficient from the convolution matrix
- * @param[in] mat16 Coefficient from the convolution matrix
- * @param[in] mat17 Coefficient from the convolution matrix
- * @param[in] mat18 Coefficient from the convolution matrix
- * @param[in] mat19 Coefficient from the convolution matrix
- * @param[in] mat20 Coefficient from the convolution matrix
- * @param[in] mat21 Coefficient from the convolution matrix
- * @param[in] mat22 Coefficient from the convolution matrix
- * @param[in] mat23 Coefficient from the convolution matrix
- * @param[in] mat24 Coefficient from the convolution matrix
- * @param[in] mat25 Coefficient from the convolution matrix
- * @param[in] mat26 Coefficient from the convolution matrix
- * @param[in] mat27 Coefficient from the convolution matrix
- * @param[in] mat28 Coefficient from the convolution matrix
- * @param[in] mat29 Coefficient from the convolution matrix
- * @param[in] mat30 Coefficient from the convolution matrix
- * @param[in] mat31 Coefficient from the convolution matrix
- * @param[in] mat32 Coefficient from the convolution matrix
- * @param[in] mat33 Coefficient from the convolution matrix
- * @param[in] mat34 Coefficient from the convolution matrix
- * @param[in] mat35 Coefficient from the convolution matrix
- * @param[in] mat36 Coefficient from the convolution matrix
- * @param[in] mat37 Coefficient from the convolution matrix
- * @param[in] mat38 Coefficient from the convolution matrix
- * @param[in] mat39 Coefficient from the convolution matrix
- * @param[in] mat40 Coefficient from the convolution matrix
- * @param[in] mat41 Coefficient from the convolution matrix
- * @param[in] mat42 Coefficient from the convolution matrix
- * @param[in] mat43 Coefficient from the convolution matrix
- * @param[in] mat44 Coefficient from the convolution matrix
- * @param[in] mat45 Coefficient from the convolution matrix
- * @param[in] mat46 Coefficient from the convolution matrix
- * @param[in] mat47 Coefficient from the convolution matrix
- * @param[in] mat48 Coefficient from the convolution matrix
- * @param[in] mat49 Coefficient from the convolution matrix
- * @param[in] mat50 Coefficient from the convolution matrix
- * @param[in] mat51 Coefficient from the convolution matrix
- * @param[in] mat52 Coefficient from the convolution matrix
- * @param[in] mat53 Coefficient from the convolution matrix
- * @param[in] mat54 Coefficient from the convolution matrix
- * @param[in] mat55 Coefficient from the convolution matrix
- * @param[in] mat56 Coefficient from the convolution matrix
- * @param[in] mat57 Coefficient from the convolution matrix
- * @param[in] mat58 Coefficient from the convolution matrix
- * @param[in] mat59 Coefficient from the convolution matrix
- * @param[in] mat60 Coefficient from the convolution matrix
- * @param[in] mat61 Coefficient from the convolution matrix
- * @param[in] mat62 Coefficient from the convolution matrix
- * @param[in] mat63 Coefficient from the convolution matrix
- * @param[in] mat64 Coefficient from the convolution matrix
- * @param[in] mat65 Coefficient from the convolution matrix
- * @param[in] mat66 Coefficient from the convolution matrix
- * @param[in] mat67 Coefficient from the convolution matrix
- * @param[in] mat68 Coefficient from the convolution matrix
- * @param[in] mat69 Coefficient from the convolution matrix
- * @param[in] mat70 Coefficient from the convolution matrix
- * @param[in] mat71 Coefficient from the convolution matrix
- * @param[in] mat72 Coefficient from the convolution matrix
- * @param[in] mat73 Coefficient from the convolution matrix
- * @param[in] mat74 Coefficient from the convolution matrix
- * @param[in] mat75 Coefficient from the convolution matrix
- * @param[in] mat76 Coefficient from the convolution matrix
- * @param[in] mat77 Coefficient from the convolution matrix
- * @param[in] mat78 Coefficient from the convolution matrix
- * @param[in] mat79 Coefficient from the convolution matrix
- * @param[in] mat80 Coefficient from the convolution matrix
- * @param[in] scale Convolution matrix scale (Sum of the coefficients, or 1 if the sum is 0)
- *
- */
-short8 convolution9x9(
- Image *src,
- const short mat0, const short mat1, const short mat2, const short mat3, const short mat4,
- const short mat5, const short mat6, const short mat7, const short mat8, const short mat9,
- const short mat10, const short mat11, const short mat12, const short mat13, const short mat14,
- const short mat15, const short mat16, const short mat17, const short mat18, const short mat19,
- const short mat20, const short mat21, const short mat22, const short mat23, const short mat24,
- const short mat25, const short mat26, const short mat27, const short mat28, const short mat29,
- const short mat30, const short mat31, const short mat32, const short mat33, const short mat34,
- const short mat35, const short mat36, const short mat37, const short mat38, const short mat39,
- const short mat40, const short mat41, const short mat42, const short mat43, const short mat44,
- const short mat45, const short mat46, const short mat47, const short mat48, const short mat49,
- const short mat50, const short mat51, const short mat52, const short mat53, const short mat54,
- const short mat55, const short mat56, const short mat57, const short mat58, const short mat59,
- const short mat60, const short mat61, const short mat62, const short mat63, const short mat64,
- const short mat65, const short mat66, const short mat67, const short mat68, const short mat69,
- const short mat70, const short mat71, const short mat72, const short mat73, const short mat74,
- const short mat75, const short mat76, const short mat77, const short mat78, const short mat79,
- const short mat80, uint scale)
-{
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels;
-
- pixels = convolution1x9(offset(src, -4, -4), mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7, mat8);
- pixels += convolution1x9(offset(src, -4, -3), mat9, mat10, mat11, mat12, mat13, mat14, mat15, mat16, mat17);
- pixels += convolution1x9(offset(src, -4, -2), mat18, mat19, mat20, mat21, mat22, mat23, mat24, mat25, mat26);
- pixels += convolution1x9(offset(src, -4, -1), mat27, mat28, mat29, mat30, mat31, mat32, mat33, mat34, mat35);
- pixels += convolution1x9(offset(src, -4, 0), mat36, mat37, mat38, mat39, mat40, mat41, mat42, mat43, mat44);
- pixels += convolution1x9(offset(src, -4, 1), mat45, mat46, mat47, mat48, mat49, mat50, mat51, mat52, mat53);
- pixels += convolution1x9(offset(src, -4, 2), mat54, mat55, mat56, mat57, mat58, mat59, mat60, mat61, mat62);
- pixels += convolution1x9(offset(src, -4, 3), mat63, mat64, mat65, mat66, mat67, mat68, mat69, mat70, mat71);
- pixels += convolution1x9(offset(src, -4, 4), mat72, mat73, mat74, mat75, mat76, mat77, mat78, mat79, mat80);
-
- if(scale > 0)
- {
- pixels /= (VEC_DATA_TYPE(DATA_TYPE, 8))scale;
- }
-
- return convert_short8_sat(pixels);
-}
-
-#ifndef DYNAMIC_MATRIX_CONVOLUTION
-
-/** Apply a 1x9 static convolution matrix to a single channel U8 input image and output a single temporary channel image.
- *
- * @attention The matrix coefficients (MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8) and DATA_TYPE need to be passed at compile time:\n
- * e.g. -DMAT0=7 -DMAT1=8, ... -DMAT8=8, -DCOMPUTE_TYPE=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U16, S16, S32
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution_separable1x9_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Output pixels
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels = convolution1x9(offset(&src, -4, 0), MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8);
-
- // Store result in dst
- vstore8(pixels, 0, (__global DATA_TYPE *)dst.ptr);
-}
-
-/** Apply a 9x1 static convolution matrix to a single channel U8 input image and output a single channel image.
- *
- * @attention The matrix coefficients (MAT9, MAT10, ... MAT17, SCALE), COMPUTE_TYPE and DATA_TYPE_OUT need to be passed at compile time:\n
- * e.g. -DMAT9=9 -DMAT10=10, ... -DMAT17=17, -DSCALE=6, -DCOMPUTE_TYPE=int, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U16, S16, S32
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution_separable9x1_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Output pixels
- VEC_DATA_TYPE(COMPUTE_TYPE, 8)
- pixels = convolution9x1(&src, MAT9, MAT10, MAT11, MAT12, MAT13, MAT14, MAT15, MAT16, MAT17);
-
- // Divide by the scale
- pixels = pixels / (VEC_DATA_TYPE(COMPUTE_TYPE, 8))SCALE;
-
- // Store result in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, (__global DATA_TYPE_OUT *)dst.ptr);
-}
-
-/** Apply a static 9x9 convolution matrix to a single channel U8 input image and output a single channel image including borders
- *
- * @attention The matrix coefficients(MAT0, MAT1, ... MAT80, SCALE), DATA_TYPE_OUT need to be passed at compile time:\n
- * e.g. -DMAT0=0 -DMAT1=1, ... -DMAT80=80, -DSCALE=6, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution9x9_static(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- short8 pixels = convolution9x9(&src,
- MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8, MAT9, MAT10, MAT11, MAT12, MAT13,
- MAT14, MAT15, MAT16, MAT17, MAT18, MAT19, MAT20, MAT21, MAT22, MAT23, MAT24, MAT25,
- MAT26, MAT27, MAT28, MAT29, MAT30, MAT31, MAT32, MAT33, MAT34, MAT35, MAT36, MAT37,
- MAT38, MAT39, MAT40, MAT41, MAT42, MAT43, MAT44, MAT45, MAT46, MAT47, MAT48, MAT49,
- MAT50, MAT51, MAT52, MAT53, MAT54, MAT55, MAT56, MAT57, MAT58, MAT59, MAT60, MAT61,
- MAT62, MAT63, MAT64, MAT65, MAT66, MAT67, MAT68, MAT69, MAT70, MAT71, MAT72, MAT73,
- MAT74, MAT75, MAT76, MAT77, MAT78, MAT79, MAT80, SCALE);
-
- // Store the result as is in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, (__global DATA_TYPE_OUT *)dst.ptr);
-}
-
-#endif // DYNAMIC_MATRIX_CONVOLUTION
diff --git a/src/core/CL/cl_kernels/convolution_rectangle.cl b/src/core/CL/cl_kernels/convolution_rectangle.cl
deleted file mode 100644
index 925a698628..0000000000
--- a/src/core/CL/cl_kernels/convolution_rectangle.cl
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "convolution3x3.cl"
-#include "convolution5x5.cl"
-#include "convolution7x7.cl"
-#include "convolution9x9.cl"
-#include "helpers.h"
-
-#define MAT_INDEX(i) MAT##i
-
-#ifndef DATA_TYPE
-#define DATA_TYPE short
-#endif /* DATA_TYPE */
-
-#ifndef COMPUTE_TYPE
-#define COMPUTE_TYPE int
-#endif /* COMPUTE_TYPE */
-
-#ifndef DATA_TYPE_OUT
-#define DATA_TYPE_OUT uchar
-#endif /* DATA_TYPE_OUT */
-
-#ifndef DYNAMIC_MATRIX_CONVOLUTION
-
-/** Apply a rectangle matrix to a single channel U8 input image and output a single channel image including borders
- *
- * @attention The matrix coefficients(MAT0, MAT1, ... MAT80, SCALE), MATRIX_WIDTH, MATRIX_HEIGHT, COMPUTE_TYPE, DATA_TYPE, DATA_TYPE_OUT need to be passed at compile time:\n
- * e.g. -DMAT0=0 -DMAT1=1, ... -DMAT80=80, -DSCALE=6, -DMATRIX_WIDTH=3, -DMATRIX_HEIGHT=5, -DCOMPUTE_TYPE=int, -DDATA_TYPE=int, -DDATA_TYPE_OUT=int
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8, S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void convolution_rectangle(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- short matrix_coeff[81] =
- {
- MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8,
- MAT9, MAT10, MAT11, MAT12, MAT13, MAT14, MAT15, MAT16, MAT17,
- MAT18, MAT19, MAT20, MAT21, MAT22, MAT23, MAT24, MAT25, MAT26,
- MAT27, MAT28, MAT29, MAT30, MAT31, MAT32, MAT33, MAT34, MAT35,
- MAT36, MAT37, MAT38, MAT39, MAT40, MAT41, MAT42, MAT43, MAT44,
- MAT45, MAT46, MAT47, MAT48, MAT49, MAT50, MAT51, MAT52, MAT53,
- MAT54, MAT55, MAT56, MAT57, MAT58, MAT59, MAT60, MAT61, MAT62,
- MAT63, MAT64, MAT65, MAT66, MAT67, MAT68, MAT69, MAT70, MAT71,
- MAT72, MAT73, MAT74, MAT75, MAT76, MAT77, MAT78, MAT79, MAT80
- };
-
- VEC_DATA_TYPE(DATA_TYPE, 8)
- pixels = (VEC_DATA_TYPE(DATA_TYPE, 8))0;
-
- for(int i = 0; i < MATRIX_HEIGHT; i++)
- {
-#if MATRIX_WIDTH == 3
- pixels += convolution1x3(offset(&src, -1, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 3], matrix_coeff[1 + i * 3],
- matrix_coeff[2 + i * 3]);
-#endif /* MATRIX_WIDTH */
-
-#if MATRIX_WIDTH == 5
- pixels += convolution1x5(offset(&src, -2, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 5], matrix_coeff[1 + i * 5],
- matrix_coeff[2 + i * 5], matrix_coeff[3 + i * 5], matrix_coeff[4 + i * 5]);
-#endif /* MATRIX_WIDTH */
-
-#if MATRIX_WIDTH == 7
- pixels += convolution1x7(offset(&src, -3, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 7], matrix_coeff[1 + i * 7],
- matrix_coeff[2 + i * 7], matrix_coeff[3 + i * 7], matrix_coeff[4 + i * 7],
- matrix_coeff[5 + i * 7], matrix_coeff[6 + i * 7]);
-#endif /* MATRIX_WIDTH */
-
-#if MATRIX_WIDTH == 9
- pixels += convolution1x9(offset(&src, -4, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 9], matrix_coeff[1 + i * 9],
- matrix_coeff[2 + i * 9], matrix_coeff[3 + i * 9], matrix_coeff[4 + i * 9],
- matrix_coeff[5 + i * 9], matrix_coeff[6 + i * 9], matrix_coeff[7 + i * 9], matrix_coeff[8 + i * 9]);
-#endif /* MATRIX_WIDTH */
- }
-
- pixels /= (VEC_DATA_TYPE(DATA_TYPE, 8))SCALE;
-
- // Store the result as is in dst
- vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, ((__global DATA_TYPE_OUT *)dst.ptr));
-}
-
-#endif /* not DYNAMIC_MATRIX_CONVOLUTION */
diff --git a/src/core/CL/cl_kernels/derivative.cl b/src/core/CL/cl_kernels/derivative.cl
deleted file mode 100644
index dddbb4d615..0000000000
--- a/src/core/CL/cl_kernels/derivative.cl
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This OpenCL kernel that computes the first-order derivative.
- *
- * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient
- * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_gx_ptr Pointer to the destination image. Supported data types: S16
- * @param[in] dst_gx_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_gx_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_gx_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_gx_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[out] dst_gy_ptr Pointer to the destination image. Supported data types: S16
- * @param[in] dst_gy_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_gy_step_x dst_gy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_gy_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_gy_step_y dst_gy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void derivative(
- IMAGE_DECLARATION(src)
-#ifdef GRAD_X
- ,
- IMAGE_DECLARATION(dst_gx)
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- ,
- IMAGE_DECLARATION(dst_gy)
-#endif /* GRAD_Y */
-)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
-#ifdef GRAD_X
- Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx);
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy);
-#endif /* GRAD_Y */
-
-#ifdef GRAD_X
- short16 l_data = convert_short16(vload16(0, offset(&src, -1, 0)));
- short16 r_data = convert_short16(vload16(0, offset(&src, 1, 0)));
- vstore16(r_data - l_data, 0, ((__global short *)dst_gx.ptr));
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- short16 t_data = convert_short16(vload16(0, offset(&src, 0, -1)));
- short16 b_data = convert_short16(vload16(0, offset(&src, 0, 1)));
- vstore16(b_data - t_data, 0, ((__global short *)dst_gy.ptr));
-#endif /* GRAD_Y */
-}
diff --git a/src/core/CL/cl_kernels/dilate.cl b/src/core/CL/cl_kernels/dilate.cl
deleted file mode 100644
index 14362c1f31..0000000000
--- a/src/core/CL/cl_kernels/dilate.cl
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This function dilates an input image.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void dilate(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 top = vload16(0, offset(&src, -1, -1));
- uchar16 middle = vload16(0, offset(&src, -1, 0));
- uchar16 bottom = vload16(0, offset(&src, -1, 1));
-
- uchar16 tmp = max(top, max(middle, bottom));
- uchar8 out = max(tmp.s01234567, max(tmp.s12345678, tmp.s23456789));
-
- vstore8(out, 0, dst.ptr);
-}
diff --git a/src/core/CL/cl_kernels/erode.cl b/src/core/CL/cl_kernels/erode.cl
deleted file mode 100644
index 810c5fc51a..0000000000
--- a/src/core/CL/cl_kernels/erode.cl
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This function erodes an input image image.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void erode(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uchar16 top = vload16(0, offset(&src, -1, -1));
- uchar16 middle = vload16(0, offset(&src, -1, 0));
- uchar16 bottom = vload16(0, offset(&src, -1, 1));
-
- uchar16 tmp = min(top, min(middle, bottom));
- uchar8 out = min(tmp.s01234567, min(tmp.s12345678, tmp.s23456789));
-
- vstore8(out, 0, dst.ptr);
-}
diff --git a/src/core/CL/cl_kernels/fast_corners.cl b/src/core/CL/cl_kernels/fast_corners.cl
deleted file mode 100644
index 89c144ab5e..0000000000
--- a/src/core/CL/cl_kernels/fast_corners.cl
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright (c) 2016-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "types.h"
-
-/* The map table to retrieve the 16 texels in the Bresenham circle of radius 3 with center in P.
- *
- * . . F 0 1 . . .
- * . E . . . 2 . .
- * D . . . . . 3 .
- * C . . P . . 4 .
- * B . . . . . 5 .
- * . A . . . 6 . .
- * . . 9 8 7 . . .
- */
-constant int offsets_s[16][2] =
-{
- { 0, -3 }, // 0
- { 1, -3 }, // 1
- { 2, -2 }, // 2
- { 3, -1 }, // 3
- { 3, 0 }, // 4
- { 3, 1 }, // 5
- { 2, 2 }, // 6
- { 1, 3 }, // 7
- { 0, 3 }, // 8
- { -1, 3 }, // 9
- { -2, 2 }, // A
- { -3, 1 }, // B
- { -3, 0 }, // C
- { -3, -1 }, // D
- { -2, -2 }, // E
- { -1, -3 }, // F
-};
-
-/** Load a pixel and set the mask values.
- *
- * @param[in] ptr The pointer to the starting address of source image
- * @param[in] a Index to indicate the position in the Bresenham circle
- * @param[in] stride Stride of source image in x dimension
- * @param[in] dark The left end of the threshold range
- * @param[in] bright The right end of the threshold range
- * @param[out] dark_mask The bit-set mask records dark pixels. Its bit is set as 1 if the corresponding pixel is dark
- * @param[out] bright_mask The bit-set mask records bright pixels. Its bit is set as 1 if the corresponding pixel is bright
- *
- */
-#define LOAD_AND_SET_MASK(ptr, a, stride, dark, bright, dark_mask, bright_mask) \
- { \
- unsigned char pixel; \
- pixel = *(ptr + (int)stride * offsets_s[a][1] + offsets_s[a][0]); \
- dark_mask |= (pixel < dark) << a; \
- bright_mask |= (pixel > bright) << a; \
- }
-
-/** Checks if a pixel is a corner. Pixel is considerred as a corner if the 9 continuous pixels in the Bresenham circle are bright or dark.
- *
- * @param[in] bright_mask The mask recording postions of bright pixels
- * @param[in] dark_mask The mask recording postions of dark pixels
- * @param[out] isCorner Indicate whether candidate pixel is corner
- */
-#define CHECK_CORNER(bright_mask, dark_mask, isCorner) \
- { \
- for(int i = 0; i < 16; i++) \
- { \
- isCorner |= ((bright_mask & 0x1FF) == 0x1FF); \
- isCorner |= ((dark_mask & 0x1FF) == 0x1FF); \
- if(isCorner) \
- { \
- break; \
- } \
- bright_mask >>= 1; \
- dark_mask >>= 1; \
- } \
- }
-
-/* Calculate pixel's strength */
-uchar compute_strength(uchar candidate_pixel, __global unsigned char *ptr, unsigned int stride, unsigned char threshold)
-{
- short a = threshold;
- short b = 255;
- while(b - a > 1)
- {
- uchar c = convert_uchar_sat((a + b) / 2);
- unsigned int bright_mask = 0;
- unsigned int dark_mask = 0;
-
- unsigned char p_bright = add_sat(candidate_pixel, c);
- unsigned char p_dark = sub_sat(candidate_pixel, c);
-
- bool isCorner = 0;
-
- for(uint i = 0; i < 16; i++)
- {
- LOAD_AND_SET_MASK(ptr, i, stride, p_dark, p_bright, dark_mask, bright_mask)
- }
-
- bright_mask |= (bright_mask << 16);
- dark_mask |= (dark_mask << 16);
- CHECK_CORNER(bright_mask, dark_mask, isCorner);
-
- if(isCorner)
- {
- a = convert_short(c);
- }
- else
- {
- b = convert_short(c);
- }
- }
- return a;
-}
-
-/** Fast corners implementation. Calculates and returns the strength of each pixel.
- *
- * The algorithm loops through the 16 pixels in the Bresenham circle and set low 16 bit of masks if corresponding pixel is bright
- * or dark. It then copy the low 16 bit to the high 16 bit of the masks. Right shift the bit to check whether the 9 continuous bits
- * from the LSB are set.
- *
- * @param[in] input_ptr Pointer to the first source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[out] output_ptr Pointer to the first source image. Supported data types: U8
- * @param[in] output_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[in] threshold_value Threshold value.
- *
- */
-__kernel void fast_corners(
- IMAGE_DECLARATION(input),
- IMAGE_DECLARATION(output),
- float threshold_value)
-{
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
- Image out = CONVERT_TO_IMAGE_STRUCT(output);
-
- const unsigned char threshold = (uchar)threshold_value;
-
- unsigned int bright_mask = 0;
- unsigned int dark_mask = 0;
-
- unsigned char isCorner = 0;
-
- unsigned char p = *in.ptr;
- unsigned char p_bright = add_sat(p, threshold);
- unsigned char p_dark = sub_sat(p, threshold);
-
- LOAD_AND_SET_MASK(in.ptr, 0, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 4, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 8, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 12, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
-
- if(((bright_mask | dark_mask) & 0x1111) == 0)
- {
- *out.ptr = 0;
- return;
- }
-
- LOAD_AND_SET_MASK(in.ptr, 1, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 2, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 3, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 5, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 6, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 7, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 9, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 10, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 11, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 13, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 14, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
- LOAD_AND_SET_MASK(in.ptr, 15, input_stride_y, p_dark, p_bright, dark_mask, bright_mask)
-
- bright_mask |= (bright_mask << 16);
- dark_mask |= (dark_mask << 16);
-
- CHECK_CORNER(bright_mask, dark_mask, isCorner)
-
- if(!isCorner)
- {
- *out.ptr = 0;
- return;
- }
-
-#ifdef USE_MAXSUPPRESSION
- *out.ptr = compute_strength(p, in.ptr, input_stride_y, threshold);
-#else /* USE_MAXSUPPRESSION */
- *out.ptr = 1;
-#endif /* USE_MAXSUPPRESSION */
-}
-
-/** Copy result to Keypoint buffer and count number of corners
- *
- * @param[in] input_ptr Pointer to the image with calculated strenghs. Supported data types: U8
- * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[in] max_num_points The maximum number of keypoints the array can hold
- * @param[out] offset The number of skipped pixels in x dimension
- * @param[out] num_of_points Number of points found
- * @param[out] out The keypoints found
- *
- */
-__kernel void copy_to_keypoint(
- IMAGE_DECLARATION(input),
- uint max_num_points,
- uint offset,
- __global uint *num_of_points,
- __global Keypoint *out)
-{
-#ifndef UPDATE_NUMBER
- if(*num_of_points >= max_num_points)
- {
- return;
- }
-#endif /* UPDATE_NUMBER */
-
- Image in = CONVERT_TO_IMAGE_STRUCT(input);
-
- uchar value = *in.ptr;
-
- if(value > 0)
- {
- int id = atomic_inc(num_of_points);
- if(id < max_num_points)
- {
- out[id].strength = value;
- out[id].x = get_global_id(0) + offset;
- out[id].y = get_global_id(1) + offset;
- out[id].tracking_status = 1;
- out[id].scale = 0.f;
- out[id].orientation = 0.f;
- out[id].error = 0.f;
- }
- }
-}
diff --git a/src/core/CL/cl_kernels/gaussian_pyramid.cl b/src/core/CL/cl_kernels/gaussian_pyramid.cl
deleted file mode 100644
index ae2c31a848..0000000000
--- a/src/core/CL/cl_kernels/gaussian_pyramid.cl
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** Computes the Gaussian Filter 1x5 + sub-sampling along the X direction
- *
- * @note Each thread computes 8 pixels
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void gaussian1x5_sub_x(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values for the convolution (20 bytes needed)
- uchar16 temp0 = vload16(0, src.ptr);
- uchar4 temp1 = vload4(0, src.ptr + 16);
-
- // Convert to USHORT8
- ushort8 l2_data = convert_ushort8((uchar8)(temp0.s02468ACE));
- ushort8 l1_data = convert_ushort8((uchar8)(temp0.s13579BDF));
- ushort8 m_data = convert_ushort8((uchar8)(temp0.s2468, temp0.sACE, temp1.s0));
- ushort8 r1_data = convert_ushort8((uchar8)(temp0.s3579, temp0.sBDF, temp1.s1));
- ushort8 r2_data = convert_ushort8((uchar8)(temp0.s468A, temp0.sCE, temp1.s02));
-
- // Compute convolution along the X direction
- ushort8 pixels = l2_data + r2_data;
- pixels += l1_data * (ushort8)4;
- pixels += m_data * (ushort8)6;
- pixels += r1_data * (ushort8)4;
-
- // Store result
- vstore8(pixels, 0, (__global ushort *)dst.ptr);
-}
-
-/** Computes the Gaussian Filter 5x1 + sub-sampling along the Y direction
- *
- * @note Each thread computes 8 pixels
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U16
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void gaussian5x1_sub_y(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values
- ushort8 u2_data = vload8(0, (__global ushort *)offset(&src, 0, 0));
- ushort8 u1_data = vload8(0, (__global ushort *)offset(&src, 0, 1));
- ushort8 m_data = vload8(0, (__global ushort *)offset(&src, 0, 2));
- ushort8 d1_data = vload8(0, (__global ushort *)offset(&src, 0, 3));
- ushort8 d2_data = vload8(0, (__global ushort *)offset(&src, 0, 4));
-
- // Compute convolution along the Y direction
- ushort8 pixels = u2_data + d2_data;
- pixels += u1_data * (ushort8)4;
- pixels += m_data * (ushort8)6;
- pixels += d1_data * (ushort8)4;
-
- // Scale result
- pixels >>= (ushort8)8;
-
- // Store result
- vstore8(convert_uchar8_sat(pixels), 0, dst.ptr);
-}
diff --git a/src/core/CL/cl_kernels/harris_corners.cl b/src/core/CL/cl_kernels/harris_corners.cl
deleted file mode 100644
index 3e3c9fd23c..0000000000
--- a/src/core/CL/cl_kernels/harris_corners.cl
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** Function running harris score on 3x3 block size
- *
- * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
- * e.g. -DDATA_TYPE=short.
- *
- * @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
- * @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
- * @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
- * @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
- * @param[in] pow4_normalization_factor Normalization factor to apply harris score
- */
-__kernel void harris_score_3x3(
- IMAGE_DECLARATION(src_gx),
- IMAGE_DECLARATION(src_gy),
- IMAGE_DECLARATION(vc),
- float sensitivity,
- float strength_thresh,
- float pow4_normalization_factor)
-{
- Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
- Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
- Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
-
- /* Gx^2, Gy^2 and Gx*Gy */
- float4 gx2 = (float4)0.0f;
- float4 gy2 = (float4)0.0f;
- float4 gxgy = (float4)0.0f;
-
- /* Row0 */
- VEC_DATA_TYPE(DATA_TYPE, 8)
- temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, -1));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, -1));
-
- float4 l_gx = convert_float4(temp_gx.s0123);
- float4 m_gx = convert_float4(temp_gx.s1234);
- float4 r_gx = convert_float4(temp_gx.s2345);
-
- float4 l_gy = convert_float4(temp_gy.s0123);
- float4 m_gy = convert_float4(temp_gy.s1234);
- float4 r_gy = convert_float4(temp_gy.s2345);
-
- gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
- gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
- gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
-
- /* Row1 */
- temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 0));
- temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 0));
-
- l_gx = convert_float4(temp_gx.s0123);
- m_gx = convert_float4(temp_gx.s1234);
- r_gx = convert_float4(temp_gx.s2345);
-
- l_gy = convert_float4(temp_gy.s0123);
- m_gy = convert_float4(temp_gy.s1234);
- r_gy = convert_float4(temp_gy.s2345);
-
- gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
- gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
- gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
-
- /* Row2 */
- temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 1));
- temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 1));
-
- l_gx = convert_float4(temp_gx.s0123);
- m_gx = convert_float4(temp_gx.s1234);
- r_gx = convert_float4(temp_gx.s2345);
-
- l_gy = convert_float4(temp_gy.s0123);
- m_gy = convert_float4(temp_gy.s1234);
- r_gy = convert_float4(temp_gy.s2345);
-
- gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
- gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
- gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
-
- /* Compute trace and determinant */
- float4 trace = gx2 + gy2;
- float4 det = gx2 * gy2 - (gxgy * gxgy);
-
- /* Compute harris score */
- float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
-
- mc = select(0.0f, mc, mc > (float4)strength_thresh);
-
- vstore4(mc, 0, (__global float *)vc.ptr);
-}
-
-/** Function for calculating harris score 1x5.
- *
- * @param[in] src_gx Pointer to gx gradient image.
- * @param[in] src_gy Pointer to gy gradient image.
- * @param[in] row Relative row.
- */
-inline float16 harris_score_1x5(Image *src_gx, Image *src_gy, int row)
-{
- float4 gx2 = 0.0f;
- float4 gy2 = 0.0f;
- float4 gxgy = 0.0f;
-
- /* Row */
- VEC_DATA_TYPE(DATA_TYPE, 8)
- temp_gx = vload8(0, (__global DATA_TYPE *)offset(src_gx, -2, row));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- temp_gy = vload8(0, (__global DATA_TYPE *)offset(src_gy, -2, row));
-
- float4 gx = convert_float4(temp_gx.s0123);
- float4 gy = convert_float4(temp_gy.s0123);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx.s1234);
- gy = convert_float4(temp_gy.s1234);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx.s2345);
- gy = convert_float4(temp_gy.s2345);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx.s3456);
- gy = convert_float4(temp_gy.s3456);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx.s4567);
- gy = convert_float4(temp_gy.s4567);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- return (float16)(gx2, gy2, gxgy, (float4)0);
-}
-
-/** Function running harris score on 5x5 block size
- *
- * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
- * e.g. -DDATA_TYPE=short.
- *
- * @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
- * @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
- * @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
- * @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
- * @param[in] pow4_normalization_factor Normalization factor to apply harris score
- */
-__kernel void harris_score_5x5(
- IMAGE_DECLARATION(src_gx),
- IMAGE_DECLARATION(src_gy),
- IMAGE_DECLARATION(vc),
- float sensitivity,
- float strength_thresh,
- float pow4_normalization_factor)
-{
- Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
- Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
- Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
-
- /* Gx^2, Gy^2 and Gx*Gy */
- float16 res = (float16)0.0f;
-
- /* Compute row */
- for(int i = -2; i < 3; i++)
- {
- res += harris_score_1x5(&src_gx, &src_gy, i);
- }
-
- float4 gx2 = res.s0123;
- float4 gy2 = res.s4567;
- float4 gxgy = res.s89AB;
-
- /* Compute trace and determinant */
- float4 trace = gx2 + gy2;
- float4 det = gx2 * gy2 - (gxgy * gxgy);
-
- /* Compute harris score */
- float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
-
- mc = select(0.0f, mc, mc > (float4)strength_thresh);
-
- vstore4(mc, 0, (__global float *)vc.ptr);
-}
-
-/** Function for calculating harris score 1x7.
- *
- * @param[in] src_gx Pointer to gx gradient image.
- * @param[in] src_gy Pointer to gy gradient image.
- * @param[in] row Relative row.
- */
-inline float16 harris_score_1x7(Image *src_gx, Image *src_gy, int row)
-{
- float4 gx2 = 0.0f;
- float4 gy2 = 0.0f;
- float4 gxgy = 0.0f;
-
- /* Row */
- VEC_DATA_TYPE(DATA_TYPE, 8)
- temp_gx0 = vload8(0, (__global DATA_TYPE *)offset(src_gx, -3, row));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- temp_gy0 = vload8(0, (__global DATA_TYPE *)offset(src_gy, -3, row));
- VEC_DATA_TYPE(DATA_TYPE, 2)
- temp_gx1 = vload2(0, (__global DATA_TYPE *)offset(src_gx, 5, row));
- VEC_DATA_TYPE(DATA_TYPE, 2)
- temp_gy1 = vload2(0, (__global DATA_TYPE *)offset(src_gy, 5, row));
-
- float4 gx = convert_float4(temp_gx0.s0123);
- float4 gy = convert_float4(temp_gy0.s0123);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx0.s1234);
- gy = convert_float4(temp_gy0.s1234);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx0.s2345);
- gy = convert_float4(temp_gy0.s2345);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx0.s3456);
- gy = convert_float4(temp_gy0.s3456);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4(temp_gx0.s4567);
- gy = convert_float4(temp_gy0.s4567);
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s567, temp_gx1.s0));
- gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s567, temp_gy1.s0));
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s67, temp_gx1.s01));
- gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s67, temp_gy1.s01));
- gx2 += (gx * gx);
- gy2 += (gy * gy);
- gxgy += (gx * gy);
-
- return (float16)(gx2, gy2, gxgy, (float4)0);
-}
-
-/** Function running harris score on 7x7 block size
- *
- * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
- * e.g. -DDATA_TYPE=short.
- *
- * @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
- * @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
- * @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
- * @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
- * @param[in] pow4_normalization_factor Normalization factor to apply harris score
- */
-__kernel void harris_score_7x7(
- IMAGE_DECLARATION(src_gx),
- IMAGE_DECLARATION(src_gy),
- IMAGE_DECLARATION(vc),
- float sensitivity,
- float strength_thresh,
- float pow4_normalization_factor)
-{
- Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
- Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
- Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
-
- /* Gx^2, Gy^2 and Gx*Gy */
- float16 res = (float16)0.0f;
-
- /* Compute row */
- for(int i = -3; i < 4; i++)
- {
- res += harris_score_1x7(&src_gx, &src_gy, i);
- }
-
- float4 gx2 = res.s0123;
- float4 gy2 = res.s4567;
- float4 gxgy = res.s89AB;
-
- /* Compute trace and determinant */
- float4 trace = gx2 + gy2;
- float4 det = gx2 * gy2 - (gxgy * gxgy);
-
- /* Compute harris score */
- float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
-
- mc = select(0.0f, mc, mc > (float4)strength_thresh);
-
- vstore4(mc, 0, (__global float *)vc.ptr);
-}
diff --git a/src/core/CL/cl_kernels/histogram.cl b/src/core/CL/cl_kernels/histogram.cl
deleted file mode 100644
index a93cb4d1c7..0000000000
--- a/src/core/CL/cl_kernels/histogram.cl
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-#define VATOMIC_INC16(histogram, win_pos) \
- { \
- atomic_inc(histogram + win_pos.s0); \
- atomic_inc(histogram + win_pos.s1); \
- atomic_inc(histogram + win_pos.s2); \
- atomic_inc(histogram + win_pos.s3); \
- atomic_inc(histogram + win_pos.s4); \
- atomic_inc(histogram + win_pos.s5); \
- atomic_inc(histogram + win_pos.s6); \
- atomic_inc(histogram + win_pos.s7); \
- atomic_inc(histogram + win_pos.s8); \
- atomic_inc(histogram + win_pos.s9); \
- atomic_inc(histogram + win_pos.sa); \
- atomic_inc(histogram + win_pos.sb); \
- atomic_inc(histogram + win_pos.sc); \
- atomic_inc(histogram + win_pos.sd); \
- atomic_inc(histogram + win_pos.se); \
- atomic_inc(histogram + win_pos.sf); \
- }
-
-/** Calculate the histogram of an 8 bit grayscale image.
- *
- * Each thread will process 16 pixels and use one local atomic operation per pixel.
- * When all work items in a work group are done the resulting local histograms are
- * added to the global histogram using global atomics.
- *
- * @note The input image is represented as a two-dimensional array of type uchar.
- * The output is represented as a one-dimensional uint array of length of num_bins
- *
- * @param[in] input_ptr Pointer to the first source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[in] histogram_local The local buffer to hold histogram result in per workgroup. Supported data types: U32
- * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32
- * @param[out] num_bins The number of bins
- * @param[out] offset The start of values to use (inclusive)
- * @param[out] range The range of a bin
- * @param[out] offrange The maximum value (exclusive)
- */
-__kernel void hist_local_kernel(IMAGE_DECLARATION(input),
- __local uint *histogram_local,
- __global uint *restrict histogram,
- uint num_bins,
- uint offset,
- uint range,
- uint offrange)
-{
- Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
- uint local_id_x = get_local_id(0);
-
- uint local_x_size = get_local_size(0);
-
- if(num_bins > local_x_size)
- {
- for(int i = local_id_x; i < num_bins; i += local_x_size)
- {
- histogram_local[i] = 0;
- }
- }
- else
- {
- if(local_id_x <= num_bins)
- {
- histogram_local[local_id_x] = 0;
- }
- }
-
- uint16 vals = convert_uint16(vload16(0, input_buffer.ptr));
-
- uint16 win_pos = select(num_bins, ((vals - offset) * num_bins) / range, (vals >= offset && vals < offrange));
-
- barrier(CLK_LOCAL_MEM_FENCE);
- VATOMIC_INC16(histogram_local, win_pos);
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if(num_bins > local_x_size)
- {
- for(int i = local_id_x; i < num_bins; i += local_x_size)
- {
- atomic_add(histogram + i, histogram_local[i]);
- }
- }
- else
- {
- if(local_id_x <= num_bins)
- {
- atomic_add(histogram + local_id_x, histogram_local[local_id_x]);
- }
- }
-}
-
-/** Calculate the histogram of an 8 bit grayscale image's border.
- *
- * Each thread will process one pixel using global atomic.
- * When all work items in a work group are done the resulting local histograms are
- * added to the global histogram using global atomics.
- *
- * @note The input image is represented as a two-dimensional array of type uchar.
- * The output is represented as a one-dimensional uint array of length of num_bins
- *
- * @param[in] input_ptr Pointer to the first source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32
- * @param[out] num_bins The number of bins
- * @param[out] offset The start of values to use (inclusive)
- * @param[out] range The range of a bin
- * @param[out] offrange The maximum value (exclusive)
- */
-__kernel void hist_border_kernel(IMAGE_DECLARATION(input),
- __global uint *restrict histogram,
- uint num_bins,
- uint offset,
- uint range,
- uint offrange)
-{
- Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
-
- uint val = (uint)(*input_buffer.ptr);
-
- uint win_pos = (val >= offset) ? (((val - offset) * num_bins) / range) : 0;
-
- if(val >= offset && (val < offrange))
- {
- atomic_inc(histogram + win_pos);
- }
-}
-
-/** Calculate the histogram of an 8 bit grayscale image with bin size of 256 and window size of 1.
- *
- * Each thread will process 16 pixels and use one local atomic operation per pixel.
- * When all work items in a work group are done the resulting local histograms are
- * added to the global histogram using global atomics.
- *
- * @note The input image is represented as a two-dimensional array of type uchar.
- * The output is represented as a one-dimensional uint array of 256 elements
- *
- * @param[in] input_ptr Pointer to the first source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[in] histogram_local The local buffer to hold histogram result in per workgroup. Supported data types: U32
- * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32
- */
-__kernel void hist_local_kernel_fixed(IMAGE_DECLARATION(input),
- __local uint *histogram_local,
- __global uint *restrict histogram)
-{
- Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
-
- uint local_index = get_local_id(0);
- uint local_x_size = get_local_size(0);
-
- for(int i = local_index; i < 256; i += local_x_size)
- {
- histogram_local[i] = 0;
- }
-
- uint16 vals = convert_uint16(vload16(0, input_buffer.ptr));
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- atomic_inc(histogram_local + vals.s0);
- atomic_inc(histogram_local + vals.s1);
- atomic_inc(histogram_local + vals.s2);
- atomic_inc(histogram_local + vals.s3);
- atomic_inc(histogram_local + vals.s4);
- atomic_inc(histogram_local + vals.s5);
- atomic_inc(histogram_local + vals.s6);
- atomic_inc(histogram_local + vals.s7);
- atomic_inc(histogram_local + vals.s8);
- atomic_inc(histogram_local + vals.s9);
- atomic_inc(histogram_local + vals.sa);
- atomic_inc(histogram_local + vals.sb);
- atomic_inc(histogram_local + vals.sc);
- atomic_inc(histogram_local + vals.sd);
- atomic_inc(histogram_local + vals.se);
- atomic_inc(histogram_local + vals.sf);
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- for(int i = local_index; i < 256; i += local_x_size)
- {
- atomic_add(histogram + i, histogram_local[i]);
- }
-}
-
-/** Calculate the histogram of an 8 bit grayscale image with bin size as 256 and window size as 1.
- *
- * Each thread will process one pixel using global atomic.
- * When all work items in a work group are done the resulting local histograms are
- * added to the global histogram using global atomics.
- *
- * @note The input image is represented as a two-dimensional array of type uchar.
- * The output is represented as a one-dimensional uint array of 256
- *
- * @param[in] input_ptr Pointer to the first source image. Supported data types: U8
- * @param[in] input_stride_x Stride of the first source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the first source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[out] histogram The output buffer to hold histogram final result. Supported data types: U32
- */
-__kernel void hist_border_kernel_fixed(IMAGE_DECLARATION(input),
- __global uint *restrict histogram)
-{
- Image input_buffer = CONVERT_TO_IMAGE_STRUCT(input);
- atomic_inc(histogram + *input_buffer.ptr);
-}
diff --git a/src/core/CL/cl_kernels/hog.cl b/src/core/CL/cl_kernels/hog.cl
deleted file mode 100644
index b14f361df6..0000000000
--- a/src/core/CL/cl_kernels/hog.cl
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "types.h"
-
-#if defined(CELL_WIDTH) && defined(CELL_HEIGHT) && defined(NUM_BINS) && defined(PHASE_SCALE)
-
-/** This OpenCL kernel computes the HOG orientation binning
- *
- * @attention The following variables must be passed at compile time:
- *
- * -# -DCELL_WIDTH = Width of the cell
- * -# -DCELL_HEIGHT = height of the cell
- * -# -DNUM_BINS = Number of bins for each cell
- * -# -DPHASE_SCALE = Scale factor used to evaluate the index of the local HOG
- *
- * @note Each work-item computes a single cell
- *
- * @param[in] mag_ptr Pointer to the source image which stores the magnitude of the gradient for each pixel. Supported data types: S16
- * @param[in] mag_stride_x Stride of the magnitude image in X dimension (in bytes)
- * @param[in] mag_step_x mag_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] mag_stride_y Stride of the magnitude image in Y dimension (in bytes)
- * @param[in] mag_step_y mag_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] mag_offset_first_element_in_bytes The offset of the first element in the magnitude image
- * @param[in] phase_ptr Pointer to the source image which stores the phase of the gradient for each pixel. Supported data types: U8
- * @param[in] phase_stride_x Stride of the phase image in X dimension (in bytes)
- * @param[in] phase_step_x phase_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] phase_stride_y Stride of the the phase image in Y dimension (in bytes)
- * @param[in] phase_step_y phase_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] phase_offset_first_element_in_bytes The offset of the first element in the the phase image
- * @param[out] dst_ptr Pointer to the destination image which stores the local HOG for each cell Supported data types: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void hog_orientation_binning(IMAGE_DECLARATION(mag),
- IMAGE_DECLARATION(phase),
- IMAGE_DECLARATION(dst))
-{
- float bins[NUM_BINS] = { 0 };
-
- // Compute address for the magnitude and phase images
- Image mag = CONVERT_TO_IMAGE_STRUCT(mag);
- Image phase = CONVERT_TO_IMAGE_STRUCT(phase);
-
- __global uchar *mag_row_ptr = mag.ptr;
- __global uchar *phase_row_ptr = phase.ptr;
-
- for(int yc = 0; yc < CELL_HEIGHT; ++yc)
- {
- int xc = 0;
- for(; xc <= (CELL_WIDTH - 4); xc += 4)
- {
- // Load magnitude and phase values
- const float4 mag_f32 = convert_float4(vload4(0, (__global short *)mag_row_ptr + xc));
- float4 phase_f32 = convert_float4(vload4(0, phase_row_ptr + xc));
-
- // Scale phase: phase * scale + 0.5f
- phase_f32 = (float4)0.5f + phase_f32 * (float4)PHASE_SCALE;
-
- // Compute histogram index.
- int4 hidx_s32 = convert_int4(phase_f32);
-
- // Compute magnitude weights (w0 and w1)
- const float4 hidx_f32 = convert_float4(hidx_s32);
-
- // w1 = phase_f32 - hidx_s32
- const float4 w1_f32 = phase_f32 - hidx_f32;
-
- // w0 = 1.0 - w1
- const float4 w0_f32 = (float4)1.0f - w1_f32;
-
- // Calculate the weights for splitting vote
- const float4 mag_w0_f32 = mag_f32 * w0_f32;
- const float4 mag_w1_f32 = mag_f32 * w1_f32;
-
- // Weighted vote between 2 bins
-
- // Check if the histogram index is equal to NUM_BINS. If so, replace the index with 0
- hidx_s32 = select(hidx_s32, (int4)0, hidx_s32 == (int4)(NUM_BINS));
-
- // Bin 0
- bins[hidx_s32.s0] += mag_w0_f32.s0;
- bins[hidx_s32.s1] += mag_w0_f32.s1;
- bins[hidx_s32.s2] += mag_w0_f32.s2;
- bins[hidx_s32.s3] += mag_w0_f32.s3;
-
- hidx_s32 += (int4)1;
-
- // Check if the histogram index is equal to NUM_BINS. If so, replace the index with 0
- hidx_s32 = select(hidx_s32, (int4)0, hidx_s32 == (int4)(NUM_BINS));
-
- // Bin1
- bins[hidx_s32.s0] += mag_w1_f32.s0;
- bins[hidx_s32.s1] += mag_w1_f32.s1;
- bins[hidx_s32.s2] += mag_w1_f32.s2;
- bins[hidx_s32.s3] += mag_w1_f32.s3;
- }
-
- // Left over computation
- for(; xc < CELL_WIDTH; xc++)
- {
- const float mag_value = *((__global short *)mag_row_ptr + xc);
- const float phase_value = *(phase_row_ptr + xc) * (float)PHASE_SCALE + 0.5f;
- const float w1 = phase_value - floor(phase_value);
-
- // The quantised phase is the histogram index [0, NUM_BINS - 1]
- // Check limit of histogram index. If hidx == NUM_BINS, hidx = 0
- const uint hidx = (uint)(phase_value) % NUM_BINS;
-
- // Weighted vote between 2 bins
- bins[hidx] += mag_value * (1.0f - w1);
- bins[(hidx + 1) % NUM_BINS] += mag_value * w1;
- }
-
- // Point to the next row of magnitude and phase images
- mag_row_ptr += mag_stride_y;
- phase_row_ptr += phase_stride_y;
- }
-
- // Compute address for the destination image
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Store the local HOG in the global memory
- int xc = 0;
- for(; xc <= (NUM_BINS - 4); xc += 4)
- {
- float4 values = vload4(0, bins + xc);
-
- vstore4(values, 0, ((__global float *)dst.ptr) + xc);
- }
-
- // Left over stores
- for(; xc < NUM_BINS; ++xc)
- {
- ((__global float *)dst.ptr)[xc] = bins[xc];
- }
-}
-#endif /* CELL_WIDTH and CELL_HEIGHT and NUM_BINS and PHASE_SCALE */
-
-#if defined(NUM_CELLS_PER_BLOCK_HEIGHT) && defined(NUM_BINS_PER_BLOCK_X) && defined(NUM_BINS_PER_BLOCK) && defined(HOG_NORM_TYPE) && defined(L2_HYST_THRESHOLD)
-
-#ifndef L2_NORM
-#error The value of enum class HOGNormType::L2_NORM has not be passed to the OpenCL kernel
-#endif /* not L2_NORM */
-
-#ifndef L2HYS_NORM
-#error The value of enum class HOGNormType::L2HYS_NORM has not be passed to the OpenCL kernel
-#endif /* not L2HYS_NORM */
-
-#ifndef L1_NORM
-#error The value of enum class HOGNormType::L1_NORM has not be passed to the OpenCL kernel
-#endif /* not L1_NORM */
-
-/** This OpenCL kernel computes the HOG block normalization
- *
- * @attention The following variables must be passed at compile time:
- *
- * -# -DNUM_CELLS_PER_BLOCK_HEIGHT = Number of cells for each block
- * -# -DNUM_BINS_PER_BLOCK_X = Number of bins for each block along the X direction
- * -# -DNUM_BINS_PER_BLOCK = Number of bins for each block
- * -# -DHOG_NORM_TYPE = Normalization type
- * -# -DL2_HYST_THRESHOLD = Threshold used for L2HYS_NORM normalization method
- * -# -DL2_NORM = Value of the enum class HOGNormType::L2_NORM
- * -# -DL2HYS_NORM = Value of the enum class HOGNormType::L2HYS_NORM
- * -# -DL1_NORM = Value of the enum class HOGNormType::L1_NORM
- *
- * @note Each work-item computes a single block
- *
- * @param[in] src_ptr Pointer to the source image which stores the local HOG. Supported data types: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image which stores the normlized HOG Supported data types: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void hog_block_normalization(IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- float sum = 0.0f;
- float4 sum_f32 = (float4)(0.0f);
-
- // Compute address for the source and destination tensor
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- for(size_t yc = 0; yc < NUM_CELLS_PER_BLOCK_HEIGHT; ++yc)
- {
- const __global float *hist_ptr = (__global float *)(src.ptr + yc * src_stride_y);
-
- int xc = 0;
- for(; xc <= (NUM_BINS_PER_BLOCK_X - 16); xc += 16)
- {
- const float4 val0 = vload4(0, hist_ptr + xc + 0);
- const float4 val1 = vload4(0, hist_ptr + xc + 4);
- const float4 val2 = vload4(0, hist_ptr + xc + 8);
- const float4 val3 = vload4(0, hist_ptr + xc + 12);
-
-#if(HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM)
- // Compute val^2 for L2_NORM or L2HYS_NORM
- sum_f32 += val0 * val0;
- sum_f32 += val1 * val1;
- sum_f32 += val2 * val2;
- sum_f32 += val3 * val3;
-#else /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */
- // Compute |val| for L1_NORM
- sum_f32 += fabs(val0);
- sum_f32 += fabs(val1);
- sum_f32 += fabs(val2);
- sum_f32 += fabs(val3);
-#endif /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */
-
- // Store linearly the input values un-normalized in the output image. These values will be reused for the normalization.
- // This approach will help us to be cache friendly in the next for loop where the normalization will be done because all the values
- // will be accessed consecutively
- vstore4(val0, 0, ((__global float *)dst.ptr) + xc + 0 + yc * NUM_BINS_PER_BLOCK_X);
- vstore4(val1, 0, ((__global float *)dst.ptr) + xc + 4 + yc * NUM_BINS_PER_BLOCK_X);
- vstore4(val2, 0, ((__global float *)dst.ptr) + xc + 8 + yc * NUM_BINS_PER_BLOCK_X);
- vstore4(val3, 0, ((__global float *)dst.ptr) + xc + 12 + yc * NUM_BINS_PER_BLOCK_X);
- }
-
- // Compute left over
- for(; xc < NUM_BINS_PER_BLOCK_X; ++xc)
- {
- const float val = hist_ptr[xc];
-
-#if(HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM)
- sum += val * val;
-#else /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */
- sum += fabs(val);
-#endif /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */
-
- ((__global float *)dst.ptr)[xc + 0 + yc * NUM_BINS_PER_BLOCK_X] = val;
- }
- }
-
- sum += dot(sum_f32, (float4)1.0f);
-
- float scale = 1.0f / (sqrt(sum) + NUM_BINS_PER_BLOCK * 0.1f);
-
-#if(HOG_NORM_TYPE == L2HYS_NORM)
- // Reset sum
- sum_f32 = (float4)0.0f;
- sum = 0.0f;
-
- int k = 0;
- for(; k <= NUM_BINS_PER_BLOCK - 16; k += 16)
- {
- float4 val0 = vload4(0, ((__global float *)dst.ptr) + k + 0);
- float4 val1 = vload4(0, ((__global float *)dst.ptr) + k + 4);
- float4 val2 = vload4(0, ((__global float *)dst.ptr) + k + 8);
- float4 val3 = vload4(0, ((__global float *)dst.ptr) + k + 12);
-
- // Scale val
- val0 = val0 * (float4)scale;
- val1 = val1 * (float4)scale;
- val2 = val2 * (float4)scale;
- val3 = val3 * (float4)scale;
-
- // Clip val if over _threshold_l2hys
- val0 = fmin(val0, (float4)L2_HYST_THRESHOLD);
- val1 = fmin(val1, (float4)L2_HYST_THRESHOLD);
- val2 = fmin(val2, (float4)L2_HYST_THRESHOLD);
- val3 = fmin(val3, (float4)L2_HYST_THRESHOLD);
-
- // Compute val^2
- sum_f32 += val0 * val0;
- sum_f32 += val1 * val1;
- sum_f32 += val2 * val2;
- sum_f32 += val3 * val3;
-
- vstore4(val0, 0, ((__global float *)dst.ptr) + k + 0);
- vstore4(val1, 0, ((__global float *)dst.ptr) + k + 4);
- vstore4(val2, 0, ((__global float *)dst.ptr) + k + 8);
- vstore4(val3, 0, ((__global float *)dst.ptr) + k + 12);
- }
-
- // Compute left over
- for(; k < NUM_BINS_PER_BLOCK; ++k)
- {
- float val = ((__global float *)dst.ptr)[k] * scale;
-
- // Clip scaled input_value if over L2_HYST_THRESHOLD
- val = fmin(val, (float)L2_HYST_THRESHOLD);
-
- sum += val * val;
-
- ((__global float *)dst.ptr)[k] = val;
- }
-
- sum += dot(sum_f32, (float4)1.0f);
-
- // We use the same constants of OpenCV
- scale = 1.0f / (sqrt(sum) + 1e-3f);
-
-#endif /* (HOG_NORM_TYPE == L2HYS_NORM) */
-
- int i = 0;
- for(; i <= (NUM_BINS_PER_BLOCK - 16); i += 16)
- {
- float4 val0 = vload4(0, ((__global float *)dst.ptr) + i + 0);
- float4 val1 = vload4(0, ((__global float *)dst.ptr) + i + 4);
- float4 val2 = vload4(0, ((__global float *)dst.ptr) + i + 8);
- float4 val3 = vload4(0, ((__global float *)dst.ptr) + i + 12);
-
- // Multiply val by the normalization scale factor
- val0 = val0 * (float4)scale;
- val1 = val1 * (float4)scale;
- val2 = val2 * (float4)scale;
- val3 = val3 * (float4)scale;
-
- vstore4(val0, 0, ((__global float *)dst.ptr) + i + 0);
- vstore4(val1, 0, ((__global float *)dst.ptr) + i + 4);
- vstore4(val2, 0, ((__global float *)dst.ptr) + i + 8);
- vstore4(val3, 0, ((__global float *)dst.ptr) + i + 12);
- }
-
- for(; i < NUM_BINS_PER_BLOCK; ++i)
- {
- ((__global float *)dst.ptr)[i] *= scale;
- }
-}
-#endif /* NUM_CELLS_PER_BLOCK_HEIGHT and NUM_BINS_PER_BLOCK_X and NUM_BINS_PER_BLOCK and HOG_NORM_TYPE and L2_HYST_THRESHOLD */
-
-#if defined(NUM_BLOCKS_PER_DESCRIPTOR_Y) && defined(NUM_BINS_PER_DESCRIPTOR_X) && defined(THRESHOLD) && defined(MAX_NUM_DETECTION_WINDOWS) && defined(IDX_CLASS) && defined(DETECTION_WINDOW_STRIDE_WIDTH) && defined(DETECTION_WINDOW_STRIDE_HEIGHT) && defined(DETECTION_WINDOW_WIDTH) && defined(DETECTION_WINDOW_HEIGHT)
-
-/** This OpenCL kernel computes the HOG detector using linear SVM
- *
- * @attention The following variables must be passed at compile time:
- *
- * -# -DNUM_BLOCKS_PER_DESCRIPTOR_Y = Number of blocks per descriptor along the Y direction
- * -# -DNUM_BINS_PER_DESCRIPTOR_X = Number of bins per descriptor along the X direction
- * -# -DTHRESHOLD = Threshold for the distance between features and SVM classifying plane
- * -# -DMAX_NUM_DETECTION_WINDOWS = Maximum number of possible detection windows. It is equal to the size of the DetectioWindow array
- * -# -DIDX_CLASS = Index of the class to detect
- * -# -DDETECTION_WINDOW_STRIDE_WIDTH = Detection window stride for the X direction
- * -# -DDETECTION_WINDOW_STRIDE_HEIGHT = Detection window stride for the Y direction
- * -# -DDETECTION_WINDOW_WIDTH = Width of the detection window
- * -# -DDETECTION_WINDOW_HEIGHT = Height of the detection window
- *
- * @note Each work-item computes a single detection window
- *
- * @param[in] src_ptr Pointer to the source image which stores the local HOG. Supported data types: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] hog_descriptor Pointer to HOG descriptor. Supported data types: F32
- * @param[out] dst Pointer to DetectionWindow array
- * @param[out] num_detection_windows Number of objects detected
- */
-__kernel void hog_detector(IMAGE_DECLARATION(src),
- __global float *hog_descriptor,
- __global DetectionWindow *dst,
- __global uint *num_detection_windows)
-{
- // Check if the DetectionWindow array is full
- if(*num_detection_windows >= MAX_NUM_DETECTION_WINDOWS)
- {
- return;
- }
-
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
-
- const int src_step_y_f32 = src_stride_y / sizeof(float);
-
- // Init score_f32 with 0
- float4 score_f32 = (float4)0.0f;
-
- // Init score with 0
- float score = 0.0f;
-
- __global float *src_row_ptr = (__global float *)src.ptr;
-
- // Compute Linear SVM
- for(int yb = 0; yb < NUM_BLOCKS_PER_DESCRIPTOR_Y; ++yb, src_row_ptr += src_step_y_f32)
- {
- int xb = 0;
-
- const int offset_y = yb * NUM_BINS_PER_DESCRIPTOR_X;
-
- for(; xb < (int)NUM_BINS_PER_DESCRIPTOR_X - 8; xb += 8)
- {
- // Load descriptor values
- float4 a0_f32 = vload4(0, src_row_ptr + xb + 0);
- float4 a1_f32 = vload4(0, src_row_ptr + xb + 4);
-
- float4 b0_f32 = vload4(0, hog_descriptor + xb + 0 + offset_y);
- float4 b1_f32 = vload4(0, hog_descriptor + xb + 4 + offset_y);
-
- // Multiply accumulate
- score_f32 += a0_f32 * b0_f32;
- score_f32 += a1_f32 * b1_f32;
- }
-
- for(; xb < NUM_BINS_PER_DESCRIPTOR_X; ++xb)
- {
- const float a = src_row_ptr[xb];
- const float b = hog_descriptor[xb + offset_y];
-
- score += a * b;
- }
- }
-
- score += dot(score_f32, (float4)1.0f);
-
- // Add the bias. The bias is located at the position (descriptor_size() - 1)
- // (descriptor_size - 1) = NUM_BINS_PER_DESCRIPTOR_X * NUM_BLOCKS_PER_DESCRIPTOR_Y
- score += hog_descriptor[NUM_BINS_PER_DESCRIPTOR_X * NUM_BLOCKS_PER_DESCRIPTOR_Y];
-
- if(score > (float)THRESHOLD)
- {
- int id = atomic_inc(num_detection_windows);
- if(id < MAX_NUM_DETECTION_WINDOWS)
- {
- dst[id].x = get_global_id(0) * DETECTION_WINDOW_STRIDE_WIDTH;
- dst[id].y = get_global_id(1) * DETECTION_WINDOW_STRIDE_HEIGHT;
- dst[id].width = DETECTION_WINDOW_WIDTH;
- dst[id].height = DETECTION_WINDOW_HEIGHT;
- dst[id].idx_class = IDX_CLASS;
- dst[id].score = score;
- }
- }
-}
-#endif /* NUM_BLOCKS_PER_DESCRIPTOR_Y && NUM_BINS_PER_DESCRIPTOR_X && THRESHOLD && MAX_NUM_DETECTION_WINDOWS && IDX_CLASS &&
- * DETECTION_WINDOW_STRIDE_WIDTH && DETECTION_WINDOW_STRIDE_HEIGHT && DETECTION_WINDOW_WIDTH && DETECTION_WINDOW_HEIGHT */
diff --git a/src/core/CL/cl_kernels/integral_image.cl b/src/core/CL/cl_kernels/integral_image.cl
deleted file mode 100644
index dd2c7982f4..0000000000
--- a/src/core/CL/cl_kernels/integral_image.cl
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This function computes the horizontal integral of the image.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U32
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void integral_horizontal(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- uint prev = 0;
-
- for(uint j = 0; j < src_step_x; j += 16)
- {
- barrier(CLK_GLOBAL_MEM_FENCE);
- uint16 res = convert_uint16(vload16(0, offset(&src, j, 0)));
- res.s0 += prev;
- res.s1 += res.s0;
- res.s2 += res.s1;
- res.s3 += res.s2;
- res.s4 += res.s3;
- res.s5 += res.s4;
- res.s6 += res.s5;
- res.s7 += res.s6;
- res.s8 += res.s7;
- res.s9 += res.s8;
- res.sA += res.s9;
- res.sB += res.sA;
- res.sC += res.sB;
- res.sD += res.sC;
- res.sE += res.sD;
- res.sF += res.sE;
- prev = res.sF;
- vstore16(res, 0, (__global uint *)offset(&dst, j, 0));
- }
-}
-
-/** This function computes the vertical integral of the image.
- *
- * @param[in,out] src_ptr Pointer to the source image. Supported data types: U32
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] height Image height.
- */
-__kernel void integral_vertical(
- IMAGE_DECLARATION(src),
- uint height)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
-
- uint8 prev = vload8(0, (__global uint *)offset(&src, 0, 0));
- for(uint j = 1; j < height; ++j)
- {
- barrier(CLK_GLOBAL_MEM_FENCE);
- uint8 res = vload8(0, (__global uint *)offset(&src, 0, j));
- res += prev;
- vstore8(res, 0, (__global uint *)offset(&src, 0, j));
- prev = res;
- }
-}
diff --git a/src/core/CL/cl_kernels/magnitude_phase.cl b/src/core/CL/cl_kernels/magnitude_phase.cl
deleted file mode 100644
index 48197d6473..0000000000
--- a/src/core/CL/cl_kernels/magnitude_phase.cl
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** Calculates L1 normalization between two inputs.
- *
- * @param[in] a First input. Supported data types: S16, S32
- * @param[in] b Second input. Supported data types: S16, S32
- *
- * @return L1 normalization magnitude result. Supported data types: S16, S32
- */
-inline VEC_DATA_TYPE(DATA_TYPE, 16) magnitude_l1(VEC_DATA_TYPE(DATA_TYPE, 16) a, VEC_DATA_TYPE(DATA_TYPE, 16) b)
-{
- return CONVERT_SAT(add_sat(abs(a), abs(b)), VEC_DATA_TYPE(DATA_TYPE, 16));
-}
-
-/** Calculates L2 normalization between two inputs.
- *
- * @param[in] a First input. Supported data types: S16, S32
- * @param[in] b Second input. Supported data types: S16, S32
- *
- * @return L2 normalization magnitude result. Supported data types: S16, S32
- */
-inline VEC_DATA_TYPE(DATA_TYPE, 16) magnitude_l2(int16 a, int16 b)
-{
- return CONVERT_SAT((sqrt(convert_float16((convert_uint16(a * a) + convert_uint16(b * b)))) + 0.5f),
- VEC_DATA_TYPE(DATA_TYPE, 16));
-}
-
-/** Calculates unsigned phase between two inputs.
- *
- * @param[in] a First input. Supported data types: S16, S32
- * @param[in] b Second input. Supported data types: S16, S32
- *
- * @return Unsigned phase mapped in the interval [0, 180]. Supported data types: U8
- */
-inline uchar16 phase_unsigned(VEC_DATA_TYPE(DATA_TYPE, 16) a, VEC_DATA_TYPE(DATA_TYPE, 16) b)
-{
- float16 angle_deg_f32 = atan2pi(convert_float16(b), convert_float16(a)) * (float16)180.0f;
- angle_deg_f32 = select(angle_deg_f32, (float16)180.0f + angle_deg_f32, angle_deg_f32 < (float16)0.0f);
- return convert_uchar16(angle_deg_f32);
-}
-
-/** Calculates signed phase between two inputs.
- *
- * @param[in] a First input. Supported data types: S16, S32
- * @param[in] b Second input. Supported data types: S16, S32
- *
- * @return Signed phase mapped in the interval [0, 256). Supported data types: U8
- */
-inline uchar16 phase_signed(VEC_DATA_TYPE(DATA_TYPE, 16) a, VEC_DATA_TYPE(DATA_TYPE, 16) b)
-{
- float16 arct = atan2pi(convert_float16(b), convert_float16(a));
- arct = select(arct, arct + 2, arct < 0.0f);
-
- return convert_uchar16(convert_int16(mad(arct, 128, 0.5f)) & (int16)0xFFu);
-}
-
-#if(1 == MAGNITUDE)
-#define MAGNITUDE_OP(x, y) magnitude_l1((x), (y))
-#elif(2 == MAGNITUDE)
-#define MAGNITUDE_OP(x, y) magnitude_l2(convert_int16(x), convert_int16(y))
-#else /* MAGNITUDE */
-#define MAGNITUDE_OP(x, y)
-#endif /* MAGNITUDE */
-
-#if(1 == PHASE)
-#define PHASE_OP(x, y) phase_unsigned((x), (y))
-#elif(2 == PHASE)
-#define PHASE_OP(x, y) phase_signed((x), (y))
-#else /* PHASE */
-#define PHASE_OP(x, y)
-#endif /* PHASE */
-
-/** Calculate the magnitude and phase of given the gradients of an image.
- *
- * @note Magnitude calculation supported: L1 normalization(type = 1) and L2 normalization(type = 2).
- * @note Phase calculation supported: Unsigned(type = 1) [0,128] and Signed(type = 2) [0,256).
- *
- * @attention To enable phase calculation -DPHASE="phase_calculation_type_id" must be provided at compile time. eg -DPHASE=1
- * @attention To enable magnitude calculation -DMAGNITUDE="magnitude_calculation_type_id" must be provided at compile time. eg -DMAGNITUDE=1
- * @attention Datatype of the two inputs is passed at compile time using -DDATA_TYPE. e.g -DDATA_TYPE=short. Supported data_types are: short and int
- *
- * @param[in] gx_ptr Pointer to the first source image (gradient X). Supported data types: S16, S32
- * @param[in] gx_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] gx_step_x gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] gx_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] gx_step_y gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] gx_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] gy_ptr Pointer to the second source image (gradient Y) . Supported data types: S16, S32
- * @param[in] gy_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] gy_step_x gy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] gy_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] gy_step_y gy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] gy_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[out] magnitude_ptr Pointer to the magnitude destination image. Supported data types: S16, S32
- * @param[in] magnitude_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] magnitude_step_x magnitude_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] magnitude_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] magnitude_step_y magnitude_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] magnitude_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] phase_ptr Pointer to the phase destination image. Supported data types: U8
- * @param[in] phase_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] phase_step_x phase_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] phase_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] phase_step_y phase_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] phase_offset_first_element_in_bytes The offset of the first element in the destination image
- * */
-__kernel void magnitude_phase(
- IMAGE_DECLARATION(gx),
- IMAGE_DECLARATION(gy)
-#ifdef MAGNITUDE
- ,
- IMAGE_DECLARATION(magnitude)
-#endif /* MAGNITUDE */
-#ifdef PHASE
- ,
- IMAGE_DECLARATION(phase)
-#endif /* PHASE */
-)
-{
- // Get pixels pointer
- Image gx = CONVERT_TO_IMAGE_STRUCT(gx);
- Image gy = CONVERT_TO_IMAGE_STRUCT(gy);
-
- // Load values
- VEC_DATA_TYPE(DATA_TYPE, 16)
- in_a = vload16(0, (__global DATA_TYPE *)gx.ptr);
- VEC_DATA_TYPE(DATA_TYPE, 16)
- in_b = vload16(0, (__global DATA_TYPE *)gy.ptr);
-
- // Calculate and store the results
-#ifdef MAGNITUDE
- Image magnitude = CONVERT_TO_IMAGE_STRUCT(magnitude);
- vstore16(MAGNITUDE_OP(in_a, in_b), 0, (__global DATA_TYPE *)magnitude.ptr);
-#endif /* MAGNITUDE */
-#ifdef PHASE
- Image phase = CONVERT_TO_IMAGE_STRUCT(phase);
- vstore16(PHASE_OP(in_a, in_b), 0, phase.ptr);
-#endif /* PHASE */
-}
diff --git a/src/core/CL/cl_kernels/mean_stddev.cl b/src/core/CL/cl_kernels/mean_stddev.cl
deleted file mode 100644
index 4ddf931e4b..0000000000
--- a/src/core/CL/cl_kernels/mean_stddev.cl
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
-
-/** This function calculates the sum and sum of squares of a given input image.
- *
- * @note To enable calculation sum of squares -DSTDDEV should be passed as a preprocessor argument.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] height Height of the input image
- * @param[out] global_sum Global sum of all elements
- * @param[out] global_sum_sq Global sum of squares of all elements
- */
-__kernel void mean_stddev_accumulate(
- IMAGE_DECLARATION(src),
- uint height,
- __global ulong *global_sum
-#ifdef STDDEV
- ,
- __global ulong *global_sum_sq
-#endif /* STDDEV */
-)
-{
- // Get pixels pointer
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
-
- uint8 tmp_sum = 0;
-#ifdef STDDEV
- uint8 tmp_sum_sq = 0;
-#endif /* STDDEV */
- // Calculate partial sum
- for(int i = 0; i < height; i++)
- {
- // Load data
- uint8 data = convert_uint8(vload8(0, offset(&src, 0, i)));
-
- tmp_sum += data;
-#ifdef STDDEV
- tmp_sum_sq += data * data;
-#endif /* STDDEV */
- }
- // Perform reduction
- tmp_sum.s0123 += tmp_sum.s4567;
- tmp_sum.s01 += tmp_sum.s23;
- atom_add(global_sum, tmp_sum.s0 + tmp_sum.s1);
-
-#ifdef STDDEV
- tmp_sum_sq.s0123 += tmp_sum_sq.s4567;
- tmp_sum_sq.s01 += tmp_sum_sq.s23;
- atom_add(global_sum_sq, tmp_sum_sq.s0 + tmp_sum_sq.s1);
-#endif /* STDDEV */
-}
-
-#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable
diff --git a/src/core/CL/cl_kernels/minmaxloc.cl b/src/core/CL/cl_kernels/minmaxloc.cl
deleted file mode 100644
index 1045f22fb1..0000000000
--- a/src/core/CL/cl_kernels/minmaxloc.cl
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "types.h"
-
-#ifndef DATA_TYPE_MIN
-#define DATA_TYPE_MIN 0x0
-#endif /* DATA_TYPE_MIN */
-
-#ifndef DATA_TYPE_MAX
-#define DATA_TYPE_MAX 0xFF
-#endif /* DATA_TYPE_MAX */
-
-inline int FloatFlip(float val)
-{
- union
- {
- int int_val;
- float flt_val;
- } u_val;
- u_val.flt_val = val;
- return (u_val.int_val >= 0) ? u_val.int_val : u_val.int_val ^ 0x7FFFFFFF;
-}
-
-__constant VEC_DATA_TYPE(DATA_TYPE, 16) type_min = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MIN);
-__constant VEC_DATA_TYPE(DATA_TYPE, 16) type_max = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MAX);
-__constant int16 idx16 = (int16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-
-/** This function identifies the min and maximum value of an input image.
- *
- * @note Input image data type must be passed as a preprocessor argument using -DDATA_TYPE.
- * Moreover, the minimum and maximum value of the given data type must be provided using -DDATA_TYPE_MIN and -DDATA_TYPE_MAX respectively.
- * @note In case image width is not a multiple of 16 then -DNON_MULTIPLE_OF_16 must be passed.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] min_max Pointer to buffer with minimum value in position 0 and maximum value in position 1
- * @param[in] width Input image width
- */
-__kernel void minmax(
- IMAGE_DECLARATION(src),
- __global int *min_max,
- int width)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
-
- // Initialize local minimum and local maximum
- VEC_DATA_TYPE(DATA_TYPE, 16)
- local_min = type_max;
- VEC_DATA_TYPE(DATA_TYPE, 16)
- local_max = type_min;
-
- // Calculate min/max of row
- int i = 0;
- for(; i + 16 <= width; i += 16)
- {
- VEC_DATA_TYPE(DATA_TYPE, 16)
- data = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0));
- local_min = min(data, local_min);
- local_max = max(data, local_max);
- }
-
-#ifdef NON_MULTIPLE_OF_16
- // Handle non multiple of 16
- VEC_DATA_TYPE(DATA_TYPE, 16)
- data = vload16(0, (__global DATA_TYPE *)offset(&src, i, 0));
-#ifdef IS_DATA_TYPE_FLOAT
- int16 valid_indices = (i + idx16) < width;
-#else /* IS_DATA_TYPE_FLOAT */
- VEC_DATA_TYPE(DATA_TYPE, 16)
- valid_indices = CONVERT((i + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16));
-#endif /* IS_DATA_TYPE_FLOAT */
- local_max = max(local_max, select(type_min, data, valid_indices));
- local_min = min(local_min, select(type_max, data, valid_indices));
-#endif /* NON_MULTIPLE_OF_16 */
-
- // Perform min/max reduction
- local_min.s01234567 = min(local_min.s01234567, local_min.s89ABCDEF);
- local_max.s01234567 = max(local_max.s01234567, local_max.s89ABCDEF);
-
- local_min.s0123 = min(local_min.s0123, local_min.s4567);
- local_max.s0123 = max(local_max.s0123, local_max.s4567);
-
- local_min.s01 = min(local_min.s01, local_min.s23);
- local_max.s01 = max(local_max.s01, local_max.s23);
-
- local_min.s0 = min(local_min.s0, local_min.s1);
- local_max.s0 = max(local_max.s0, local_max.s1);
-
- // Update global min/max
-#ifdef IS_DATA_TYPE_FLOAT
- atomic_min(&min_max[0], FloatFlip(local_min.s0));
- atomic_max(&min_max[1], FloatFlip(local_max.s0));
-#else /* IS_DATA_TYPE_FLOAT */
- atomic_min(&min_max[0], local_min.s0);
- atomic_max(&min_max[1], local_max.s0);
-#endif /* IS_DATA_TYPE_FLOAT */
-}
-
-/** This function counts the min and max occurrences in an image and tags their position.
- *
- * @note -DCOUNT_MIN_MAX should be specified if we want to count the occurrences of the minimum and maximum values.
- * @note -DLOCATE_MIN and/or -DLOCATE_MAX should be specified if we want to store the position of each occurrence on the given array.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] min_max Pointer to buffer with minimum value in position 0 and maximum value in position 1
- * @param[out] min_max_count Pointer to buffer with minimum value occurrences in position 0 and maximum value occurrences in position 1
- * @param[out] min_loc Array that holds the location of the minimum value occurrences
- * @param[in] max_min_loc_count The maximum number of min value occurrences coordinates the array can hold
- * @param[out] max_loc Array that holds the location of the maximum value occurrences
- * @param[in] max_max_loc_count The maximum number of max value occurrences coordinates the array can hold
- */
-__kernel void minmaxloc(
- IMAGE_DECLARATION(src),
- __global int *min_max,
- __global uint *min_max_count
-#ifdef LOCATE_MIN
- ,
- __global Coordinates2D *min_loc, uint max_min_loc_count
-#endif /* LOCATE_MIN */
-#ifdef LOCATE_MAX
- ,
- __global Coordinates2D *max_loc, uint max_max_loc_count
-#endif /* LOCATE_MAX */
-)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
-
-#ifdef IS_DATA_TYPE_FLOAT
- __global float *min_max_ptr = (__global float *)min_max;
- float min_value = min_max_ptr[0];
- float max_value = min_max_ptr[1];
-#else /* IS_DATA_TYPE_FLOAT */
- int min_value = min_max[0];
- int max_value = min_max[1];
-#endif /* IS_DATA_TYPE_FLOAT */
-
- DATA_TYPE value = *((__global DATA_TYPE *)src.ptr);
-#ifdef COUNT_MIN_MAX
- if(value == min_value)
- {
- uint idx = atomic_inc(&min_max_count[0]);
-#ifdef LOCATE_MIN
- if(idx < max_min_loc_count)
- {
- min_loc[idx].x = get_global_id(0);
- min_loc[idx].y = get_global_id(1);
- }
-#endif /* LOCATE_MIN */
- }
- if(value == max_value)
- {
- uint idx = atomic_inc(&min_max_count[1]);
-#ifdef LOCATE_MAX
- if(idx < max_max_loc_count)
- {
- max_loc[idx].x = get_global_id(0);
- max_loc[idx].y = get_global_id(1);
- }
-#endif /* LOCATE_MAX */
- }
-#endif /* COUNT_MIN_MAX */
-}
diff --git a/src/core/CL/cl_kernels/non_linear_filter3x3.cl b/src/core/CL/cl_kernels/non_linear_filter3x3.cl
deleted file mode 100644
index 93c5024c52..0000000000
--- a/src/core/CL/cl_kernels/non_linear_filter3x3.cl
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "non_linear_filter_helpers.h"
-
-/** This function applies a non linear filter on a 3x3 box basis on an input image.
- *
- * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void non_linear_filter_box3x3(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values
- uchar16 top = vload16(0, offset(&src, -1, -1));
- uchar16 middle = vload16(0, offset(&src, -1, 0));
- uchar16 bottom = vload16(0, offset(&src, -1, 1));
-
- // Apply respective filter
-#ifdef MIN
- uchar16 tmp = min(top, min(middle, bottom));
- uchar8 out = row_reduce_min_3(tmp);
-#elif defined(MAX)
- uchar16 tmp = max(top, max(middle, bottom));
- uchar8 out = row_reduce_max_3(tmp);
-#elif defined(MEDIAN)
- uchar8 p0 = top.s01234567;
- uchar8 p1 = top.s12345678;
- uchar8 p2 = top.s23456789;
- uchar8 p3 = middle.s01234567;
- uchar8 p4 = middle.s12345678;
- uchar8 p5 = middle.s23456789;
- uchar8 p6 = bottom.s01234567;
- uchar8 p7 = bottom.s12345678;
- uchar8 p8 = bottom.s23456789;
- uchar8 out = sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8);
-#else /* MIN or MAX or MEDIAN */
-#error "Unsupported filter function"
-#endif /* MIN or MAX or MEDIAN */
-
- // Store result
- vstore8(out, 0, dst.ptr);
-}
-
-/** This function applies a non linear filter on a 3x3 cross basis on an input image.
- *
- * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void non_linear_filter_cross3x3(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values
- uchar8 top = vload8(0, offset(&src, 0, -1));
- uchar16 middle = vload16(0, offset(&src, -1, 0));
- uchar8 bottom = vload8(0, offset(&src, 0, 1));
-
- // Apply respective filter
-#ifdef MIN
- uchar8 tmp_middle = row_reduce_min_3(middle);
- uchar8 out = min(tmp_middle, min(top, bottom));
-#elif defined(MAX)
- uchar8 tmp_middle = row_reduce_max_3(middle);
- uchar8 out = max(tmp_middle, max(top, bottom));
-#elif defined(MEDIAN)
- uchar8 p0 = top.s01234567;
- uchar8 p1 = middle.s01234567;
- uchar8 p2 = middle.s12345678;
- uchar8 p3 = middle.s23456789;
- uchar8 p4 = bottom.s01234567;
- uchar8 out = sort5(p0, p1, p2, p3, p4);
-#else /* MIN or MAX or MEDIAN */
-#error "Unsupported filter function"
-#endif /* MIN or MAX or MEDIAN */
-
- // Store result
- vstore8(out, 0, dst.ptr);
-}
-
-/** This function applies a non linear filter on a 3x3 disk basis on an input image.
- *
- * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void non_linear_filter_disk3x3(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values
- uchar16 top = vload16(0, offset(&src, -1, -1));
- uchar16 middle = vload16(0, offset(&src, -1, 0));
- uchar16 bottom = vload16(0, offset(&src, -1, 1));
-
- // Apply respective filter
-#ifdef MIN
- uchar16 tmp = min(top, min(middle, bottom));
- uchar8 out = row_reduce_min_3(tmp);
-#elif defined(MAX)
- uchar16 tmp = max(top, max(middle, bottom));
- uchar8 out = row_reduce_max_3(tmp);
-#elif defined(MEDIAN)
- uchar8 p0 = top.s01234567;
- uchar8 p1 = top.s12345678;
- uchar8 p2 = top.s23456789;
- uchar8 p3 = middle.s01234567;
- uchar8 p4 = middle.s12345678;
- uchar8 p5 = middle.s23456789;
- uchar8 p6 = bottom.s01234567;
- uchar8 p7 = bottom.s12345678;
- uchar8 p8 = bottom.s23456789;
- uchar8 out = sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8);
-#else /* MIN or MAX or MEDIAN */
-#error "Unsupported filter function"
-#endif /* MIN or MAX or MEDIAN */
-
- // Store result
- vstore8(out, 0, dst.ptr);
-}
diff --git a/src/core/CL/cl_kernels/non_linear_filter5x5.cl b/src/core/CL/cl_kernels/non_linear_filter5x5.cl
deleted file mode 100644
index 7c87284a72..0000000000
--- a/src/core/CL/cl_kernels/non_linear_filter5x5.cl
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "non_linear_filter_helpers.h"
-
-// Sorting networks below were generated using http://pages.ripco.net/~jgamble/nw.html
-
-/** Sorting network to sort 8 disks of diameter 5 and return their median.
- *
- * @param[in] top2 Values of elements two rows above.
- * @param[in] top Values of elements one row above.
- * @param[in] middle Values of middle elements.
- * @param[in] bottom Values of elements one row below.
- * @param[in] bottom2 Values of elements two rows below.
- *
- * @return Median values for 8 elements.
- */
-inline uchar8 median_disk5x5(uchar16 top2, uchar16 top, uchar16 middle, uchar16 bottom, uchar16 bottom2)
-{
- uchar8 p0 = top2.s01234567;
- uchar8 p1 = top2.s12345678;
- uchar8 p2 = top2.s23456789;
- uchar8 p3 = top.s01234567;
- uchar8 p4 = top.s12345678;
- uchar8 p5 = top.s23456789;
- uchar8 p6 = top.s3456789A;
- uchar8 p7 = top.s456789AB;
- uchar8 p8 = middle.s01234567;
- uchar8 p9 = middle.s12345678;
- uchar8 p10 = middle.s23456789;
- uchar8 p11 = middle.s3456789A;
- uchar8 p12 = middle.s456789AB;
- uchar8 p13 = bottom.s01234567;
- uchar8 p14 = bottom.s12345678;
- uchar8 p15 = bottom.s23456789;
- uchar8 p16 = bottom.s3456789A;
- uchar8 p17 = bottom.s456789AB;
- uchar8 p18 = bottom2.s01234567;
- uchar8 p19 = bottom2.s12345678;
- uchar8 p20 = bottom2.s23456789;
-
- SORT(p0, p1);
- SORT(p2, p3);
- SORT(p4, p5);
- SORT(p6, p7);
- SORT(p8, p9);
- SORT(p10, p11);
- SORT(p12, p13);
- SORT(p14, p15);
- SORT(p16, p17);
- SORT(p18, p19);
- SORT(p0, p2);
- SORT(p1, p3);
- SORT(p4, p6);
- SORT(p5, p7);
- SORT(p8, p10);
- SORT(p9, p11);
- SORT(p12, p14);
- SORT(p13, p15);
- SORT(p16, p18);
- SORT(p17, p19);
- SORT(p1, p2);
- SORT(p5, p6);
- SORT(p0, p4);
- SORT(p3, p7);
- SORT(p9, p10);
- SORT(p13, p14);
- SORT(p8, p12);
- SORT(p11, p15);
- SORT(p17, p18);
- SORT(p16, p20);
- SORT(p1, p5);
- SORT(p2, p6);
- SORT(p9, p13);
- SORT(p10, p14);
- SORT(p0, p8);
- SORT(p7, p15);
- SORT(p17, p20);
- SORT(p1, p4);
- SORT(p3, p6);
- SORT(p9, p12);
- SORT(p11, p14);
- SORT(p18, p20);
- SORT(p0, p16);
- SORT(p2, p4);
- SORT(p3, p5);
- SORT(p10, p12);
- SORT(p11, p13);
- SORT(p1, p9);
- SORT(p6, p14);
- SORT(p19, p20);
- SORT(p3, p4);
- SORT(p11, p12);
- SORT(p1, p8);
- SORT(p2, p10);
- SORT(p5, p13);
- SORT(p7, p14);
- SORT(p3, p11);
- SORT(p2, p8);
- SORT(p4, p12);
- SORT(p7, p13);
- SORT(p1, p17);
- SORT(p3, p10);
- SORT(p5, p12);
- SORT(p1, p16);
- SORT(p2, p18);
- SORT(p3, p9);
- SORT(p6, p12);
- SORT(p2, p16);
- SORT(p3, p8);
- SORT(p7, p12);
- SORT(p5, p9);
- SORT(p6, p10);
- SORT(p4, p8);
- SORT(p7, p11);
- SORT(p3, p19);
- SORT(p5, p8);
- SORT(p7, p10);
- SORT(p3, p18);
- SORT(p4, p20);
- SORT(p6, p8);
- SORT(p7, p9);
- SORT(p3, p17);
- SORT(p5, p20);
- SORT(p7, p8);
- SORT(p3, p16);
- SORT(p6, p20);
- SORT(p5, p17);
- SORT(p7, p20);
- SORT(p4, p16);
- SORT(p6, p18);
- SORT(p5, p16);
- SORT(p7, p19);
- SORT(p7, p18);
- SORT(p6, p16);
- SORT(p7, p17);
- SORT(p10, p18);
- SORT(p7, p16);
- SORT(p9, p17);
- SORT(p8, p16);
- SORT(p9, p16);
- SORT(p10, p16);
-
- return p10;
-}
-
-/** Sorting network to sort 8 boxes of size 5 and return their median.
- *
- * @param[in] top2 Values of elements two rows above.
- * @param[in] top Values of elements one row above.
- * @param[in] middle Values of middle elements.
- * @param[in] bottom Values of elements one row below.
- * @param[in] bottom2 Values of elements two rows below.
- *
- * @return Median values for 8 elements.
- */
-inline uchar8 median_box5x5(uchar16 top2, uchar16 top, uchar16 middle, uchar16 bottom, uchar16 bottom2)
-{
- uchar8 p0 = top2.s01234567;
- uchar8 p1 = top2.s12345678;
- uchar8 p2 = top2.s23456789;
- uchar8 p3 = top2.s3456789A;
- uchar8 p4 = top2.s456789AB;
- uchar8 p5 = top.s01234567;
- uchar8 p6 = top.s12345678;
- uchar8 p7 = top.s23456789;
- uchar8 p8 = top.s3456789A;
- uchar8 p9 = top.s456789AB;
- uchar8 p10 = middle.s01234567;
- uchar8 p11 = middle.s12345678;
- uchar8 p12 = middle.s23456789;
- uchar8 p13 = middle.s3456789A;
- uchar8 p14 = middle.s456789AB;
- uchar8 p15 = bottom.s01234567;
- uchar8 p16 = bottom.s12345678;
- uchar8 p17 = bottom.s23456789;
- uchar8 p18 = bottom.s3456789A;
- uchar8 p19 = bottom.s456789AB;
- uchar8 p20 = bottom2.s01234567;
- uchar8 p21 = bottom2.s12345678;
- uchar8 p22 = bottom2.s23456789;
- uchar8 p23 = bottom2.s3456789A;
- uchar8 p24 = bottom2.s456789AB;
-
- SORT(p1, p2);
- SORT(p0, p1);
- SORT(p1, p2);
- SORT(p4, p5);
- SORT(p3, p4);
- SORT(p4, p5);
- SORT(p0, p3);
- SORT(p2, p5);
- SORT(p2, p3);
- SORT(p1, p4);
- SORT(p1, p2);
- SORT(p3, p4);
- SORT(p7, p8);
- SORT(p6, p7);
- SORT(p7, p8);
- SORT(p10, p11);
- SORT(p9, p10);
- SORT(p10, p11);
- SORT(p6, p9);
- SORT(p8, p11);
- SORT(p8, p9);
- SORT(p7, p10);
- SORT(p7, p8);
- SORT(p9, p10);
- SORT(p0, p6);
- SORT(p4, p10);
- SORT(p4, p6);
- SORT(p2, p8);
- SORT(p2, p4);
- SORT(p6, p8);
- SORT(p1, p7);
- SORT(p5, p11);
- SORT(p5, p7);
- SORT(p3, p9);
- SORT(p3, p5);
- SORT(p7, p9);
- SORT(p1, p2);
- SORT(p3, p4);
- SORT(p5, p6);
- SORT(p7, p8);
- SORT(p9, p10);
- SORT(p13, p14);
- SORT(p12, p13);
- SORT(p13, p14);
- SORT(p16, p17);
- SORT(p15, p16);
- SORT(p16, p17);
- SORT(p12, p15);
- SORT(p14, p17);
- SORT(p14, p15);
- SORT(p13, p16);
- SORT(p13, p14);
- SORT(p15, p16);
- SORT(p19, p20);
- SORT(p18, p19);
- SORT(p19, p20);
- SORT(p21, p22);
- SORT(p23, p24);
- SORT(p21, p23);
- SORT(p22, p24);
- SORT(p22, p23);
- SORT(p18, p21);
- SORT(p20, p23);
- SORT(p20, p21);
- SORT(p19, p22);
- SORT(p22, p24);
- SORT(p19, p20);
- SORT(p21, p22);
- SORT(p23, p24);
- SORT(p12, p18);
- SORT(p16, p22);
- SORT(p16, p18);
- SORT(p14, p20);
- SORT(p20, p24);
- SORT(p14, p16);
- SORT(p18, p20);
- SORT(p22, p24);
- SORT(p13, p19);
- SORT(p17, p23);
- SORT(p17, p19);
- SORT(p15, p21);
- SORT(p15, p17);
- SORT(p19, p21);
- SORT(p13, p14);
- SORT(p15, p16);
- SORT(p17, p18);
- SORT(p19, p20);
- SORT(p21, p22);
- SORT(p23, p24);
- SORT(p0, p12);
- SORT(p8, p20);
- SORT(p8, p12);
- SORT(p4, p16);
- SORT(p16, p24);
- SORT(p12, p16);
- SORT(p2, p14);
- SORT(p10, p22);
- SORT(p10, p14);
- SORT(p6, p18);
- SORT(p6, p10);
- SORT(p10, p12);
- SORT(p1, p13);
- SORT(p9, p21);
- SORT(p9, p13);
- SORT(p5, p17);
- SORT(p13, p17);
- SORT(p3, p15);
- SORT(p11, p23);
- SORT(p11, p15);
- SORT(p7, p19);
- SORT(p7, p11);
- SORT(p11, p13);
- SORT(p11, p12);
- return p12;
-}
-
-/** This function applies a non linear filter on a 5x5 box basis on an input image.
- *
- * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void non_linear_filter_box5x5(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values
- uchar16 top2 = vload16(0, offset(&src, -2, -2));
- uchar16 top = vload16(0, offset(&src, -2, -1));
- uchar16 middle = vload16(0, offset(&src, -2, 0));
- uchar16 bottom = vload16(0, offset(&src, -2, 1));
- uchar16 bottom2 = vload16(0, offset(&src, -2, 2));
-
- // Apply respective filter
-#ifdef MIN
- uchar16 tmp = min(middle, min(min(top2, top), min(bottom, bottom2)));
- uchar8 out = row_reduce_min_5(tmp);
-#elif defined(MAX)
- uchar16 tmp = max(middle, max(max(top2, top), max(bottom, bottom2)));
- uchar8 out = row_reduce_max_5(tmp);
-#elif defined(MEDIAN)
- uchar8 out = median_box5x5(top2, top, middle, bottom, bottom2);
-#else /* MIN or MAX or MEDIAN */
-#error "Unsupported filter function"
-#endif /* MIN or MAX or MEDIAN */
-
- // Store result
- vstore8(out, 0, dst.ptr);
-}
-
-/** This function applies a non linear filter on a 5x5 cross basis on an input image.
- *
- * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void non_linear_filter_cross5x5(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values
- uchar8 top2 = vload8(0, offset(&src, 0, -2));
- uchar8 top = vload8(0, offset(&src, 0, -1));
- uchar16 middle = vload16(0, offset(&src, -2, 0));
- uchar8 bottom = vload8(0, offset(&src, 0, 1));
- uchar8 bottom2 = vload8(0, offset(&src, 0, 2));
-
- // Apply respective filter
-#ifdef MIN
- uchar8 tmp_middle = row_reduce_min_5(middle);
- uchar8 out = min(tmp_middle, min(min(top2, top), min(bottom, bottom2)));
-#elif defined(MAX)
- uchar8 tmp_middle = row_reduce_max_5(middle);
- uchar8 out = max(tmp_middle, max(max(top2, top.s01234567), max(bottom, bottom2)));
-#elif defined(MEDIAN)
- uchar8 p0 = top2;
- uchar8 p1 = top;
- uchar8 p2 = middle.s01234567;
- uchar8 p3 = middle.s12345678;
- uchar8 p4 = middle.s23456789;
- uchar8 p5 = middle.s3456789A;
- uchar8 p6 = middle.s456789AB;
- uchar8 p7 = bottom;
- uchar8 p8 = bottom2;
- uchar8 out = sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8);
-#else /* MIN or MAX or MEDIAN */
-#error "Unsupported filter function"
-#endif /* MIN or MAX or MEDIAN */
-
- // Store result
- vstore8(out, 0, dst.ptr);
-}
-
-/** This function applies a non linear filter on a 5x5 disk basis on an input image.
- *
- * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void non_linear_filter_disk5x5(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- // Load values
- uchar16 top2 = vload16(0, offset(&src, -2, -2));
- uchar16 top = vload16(0, offset(&src, -2, -1));
- uchar16 middle = vload16(0, offset(&src, -2, 0));
- uchar16 bottom = vload16(0, offset(&src, -2, 1));
- uchar16 bottom2 = vload16(0, offset(&src, -2, 2));
-
- // Shift top2 and bottom2 values
- top2 = top2.s123456789ABCDEFF;
- bottom2 = bottom2.s123456789ABCDEFF;
-
- // Apply respective filter
-#ifdef MIN
- uchar16 tmp_3 = min(top2, bottom2);
- uchar16 tmp_5 = min(middle, min(top, bottom));
- uchar8 tmp_3_red = row_reduce_min_3(tmp_3);
- uchar8 tmp_5_red = row_reduce_min_5(tmp_5);
- uchar8 out = min(tmp_3_red, tmp_5_red);
-#elif defined(MAX)
- uchar16 tmp_3 = max(top2, bottom2);
- uchar16 tmp_5 = max(middle, max(top, bottom));
- uchar8 tmp_3_red = row_reduce_max_3(tmp_3);
- uchar8 tmp_5_red = row_reduce_max_5(tmp_5);
- uchar8 out = max(tmp_3_red, tmp_5_red);
-#elif defined(MEDIAN)
- uchar8 out = median_disk5x5(top2, top, middle, bottom, bottom2);
-#else /* MIN or MAX or MEDIAN */
-#error "Unsupported filter function"
-#endif /* MIN or MAX or MEDIAN */
-
- // Store result
- vstore8(out, 0, dst.ptr);
-}
diff --git a/src/core/CL/cl_kernels/non_linear_filter_helpers.h b/src/core/CL/cl_kernels/non_linear_filter_helpers.h
deleted file mode 100644
index 3fcfad46f5..0000000000
--- a/src/core/CL/cl_kernels/non_linear_filter_helpers.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/** Sorts element-wise two vectors.
- *
- * @param[in, out] a First vector
- * @param[in, out] b Second vector
- */
-#define SORT(a, b) \
- { \
- uchar8 min_val = min(a, b); \
- uchar8 max_val = max(a, b); \
- a = min_val; \
- b = max_val; \
- }
-
-// Sorting networks below were generated using http://pages.ripco.net/~jgamble/nw.html
-
-/** Sorting network to sort 5 vectors of 8 elements and return their median.
- *
- * @param[in] p0 First element vector
- * @param[in] p1 Second element vector
- * @param[in] p2 Third element vector
- * @param[in] p3 Fourth element vector
- * @param[in] p4 Fifth element vector
- *
- * @return Median values for 8 elements.
- */
-inline uchar8 sort5(uchar8 p0, uchar8 p1, uchar8 p2, uchar8 p3, uchar8 p4)
-{
- SORT(p0, p1);
- SORT(p2, p3);
- SORT(p0, p2);
- SORT(p1, p3);
- SORT(p1, p2);
- SORT(p0, p4);
- SORT(p1, p4);
- SORT(p2, p4);
-
- return p2;
-}
-
-/** Sorting network to sort 9 vectors of 8 elements and return their median.
- *
- * @param[in] p0 First element vector
- * @param[in] p1 Second element vector
- * @param[in] p2 Third element vector
- * @param[in] p3 Fourth element vector
- * @param[in] p4 Fifth element vector
- * @param[in] p5 Sixth element vector
- * @param[in] p6 Seventh element vector
- * @param[in] p7 Eigth element vector
- * @param[in] p8 Ninth element vector
- *
- * @return Median values for 8 elements.
- */
-inline uchar8 sort9(uchar8 p0, uchar8 p1, uchar8 p2, uchar8 p3, uchar8 p4, uchar8 p5, uchar8 p6, uchar8 p7, uchar8 p8)
-{
- SORT(p1, p2);
- SORT(p4, p5);
- SORT(p7, p8);
- SORT(p0, p1);
- SORT(p3, p4);
- SORT(p6, p7);
- SORT(p1, p2);
- SORT(p4, p5);
- SORT(p7, p8);
- SORT(p0, p3);
- SORT(p5, p8);
- SORT(p4, p7);
- SORT(p3, p6);
- SORT(p1, p4);
- SORT(p2, p5);
- SORT(p4, p7);
- SORT(p4, p2);
- SORT(p6, p4);
- SORT(p4, p2);
-
- return p4;
-}
-
-/** Calculate the minimum of a sliding window of size 3.
- *
- * @param val Values to calculate the minimum values
- *
- * @return Minimum values of 8 elements on a sliding window of size 3.
- */
-inline uchar8 row_reduce_min_3(uchar16 val)
-{
- return min(val.s01234567, min(val.s12345678, val.s23456789));
-}
-
-/** Calculate the maximum of a sliding window of size 3.
- *
- * @param val Values to calculate the maximum values
- *
- * @return Maximum values of 8 elements on a sliding window of size 3.
- */
-inline uchar8 row_reduce_max_3(uchar16 val)
-{
- return max(val.s01234567, max(val.s12345678, val.s23456789));
-}
-
-/** Calculate the minimum of a sliding window of size 5.
- *
- * @param val Values to calculate the minimum values
- *
- * @return Minimum values of 8 elements on a sliding window of size 5.
- */
-inline uchar8 row_reduce_min_5(uchar16 val)
-{
- return min(val.s01234567, min(min(val.s12345678, val.s23456789), min(val.s3456789A, val.s456789AB)));
-}
-
-/** Calculate the maximum of a sliding window of size 5.
- *
- * @param val Values to calculate the maximum values
- *
- * @return Maximum values of 8 elements on a sliding window of size 5.
- */
-inline uchar8 row_reduce_max_5(uchar16 val)
-{
- return max(val.s01234567, max(max(val.s12345678, val.s23456789), max(val.s3456789A, val.s456789AB)));
-}
diff --git a/src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl b/src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl
deleted file mode 100644
index 9bbde1a57f..0000000000
--- a/src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "types.h"
-
-/*
- *The criteria for lost tracking is that the spatial gradient matrix has:
- * - Determinant less than DETERMINANT_THR
- * - or minimum eigenvalue is smaller then EIGENVALUE_THR
- *
- * The thresholds for the determinant and the minimum eigenvalue is
- * defined by the OpenVX spec
- *
- * Note: Also lost tracking happens when the point tracked coordinate is outside
- * the image coordinates
- *
- * https://www.khronos.org/registry/vx/specs/1.0/html/d0/d0c/group__group__vision__function__opticalflowpyrlk.html
- */
-
-/* Internal Lucas-Kanade Keypoint struct */
-typedef struct InternalKeypoint
-{
- float x; /**< The x coordinate. */
- float y; /**< The y coordinate. */
- float tracking_status; /**< A zero indicates a lost point. Initialized to 1 by corner detectors. */
- float dummy; /**< Dummy member for alignment. */
-} InternalKeypoint;
-
-/** Threshold for the determinant. Used for lost tracking criteria */
-#define DETERMINANT_THR 1.0e-07f
-
-/** Thresholds for minimum eigenvalue. Used for lost tracking criteria */
-#define EIGENVALUE_THR 1.0e-04f
-
-/** Constants used for Lucas-Kanade Algorithm */
-#define W_BITS (14)
-#define FLT_SCALE (1.0f / (float)(1 << 20))
-#define D0 ((float)(1 << W_BITS))
-#define D1 (1.0f / (float)(1 << (W_BITS - 5)))
-
-/** Initializes the internal new points array when the level of pyramid is NOT equal to max.
- *
- * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid.
- * @param[in,out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid.
- * @param[in] scale Scale factor to apply for the new_point coordinates.
- */
-__kernel void init_level(
- __global float4 *old_points_internal,
- __global float4 *new_points_internal,
- const float scale)
-{
- int idx = get_global_id(0);
-
- // Get old and new keypoints
- float4 old_point = old_points_internal[idx];
- float4 new_point = new_points_internal[idx];
-
- // Scale accordingly with the pyramid_scale
- old_point.xy *= (float2)(2.0f);
- new_point.xy *= (float2)(2.0f);
-
- old_points_internal[idx] = old_point;
- new_points_internal[idx] = new_point;
-}
-
-/** Initializes the internal new points array when the level of pyramid is equal to max.
- *
- * @param[in] old_points An array of key points that are defined at the old_images high resolution pyramid.
- * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid.
- * @param[out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid.
- * @param[in] scale Scale factor to apply for the new_point coordinates.
- */
-__kernel void init_level_max(
- __global Keypoint *old_points,
- __global InternalKeypoint *old_points_internal,
- __global InternalKeypoint *new_points_internal,
- const float scale)
-{
- int idx = get_global_id(0);
-
- Keypoint old_point = old_points[idx];
-
- // Get old keypoint to track
- InternalKeypoint old_point_internal;
- old_point_internal.x = old_point.x * scale;
- old_point_internal.y = old_point.y * scale;
- old_point_internal.tracking_status = 1.f;
-
- // Store internal keypoints
- old_points_internal[idx] = old_point_internal;
- new_points_internal[idx] = old_point_internal;
-}
-
-/** Initializes the new_points array when the level of pyramid is equal to max and if use_initial_estimate = 1.
- *
- * @param[in] old_points An array of key points that are defined at the old_images high resolution pyramid.
- * @param[in] new_points_estimates An array of estimate key points that are defined at the old_images high resolution pyramid.
- * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid.
- * @param[out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid.
- * @param[in] scale Scale factor to apply for the new_point coordinates.
- */
-__kernel void init_level_max_initial_estimate(
- __global Keypoint *old_points,
- __global Keypoint *new_points_estimates,
- __global InternalKeypoint *old_points_internal,
- __global InternalKeypoint *new_points_internal,
- const float scale)
-{
- int idx = get_global_id(0);
-
- Keypoint old_point = old_points[idx];
- Keypoint new_point_estimate = new_points_estimates[idx];
- InternalKeypoint old_point_internal;
- InternalKeypoint new_point_internal;
-
- // Get old keypoint to track
- old_point_internal.x = old_point.x * scale;
- old_point_internal.y = old_point.y * scale;
- old_point_internal.tracking_status = 1.f;
-
- // Get new keypoint to track
- new_point_internal.x = new_point_estimate.x * scale;
- new_point_internal.y = new_point_estimate.y * scale;
- new_point_internal.tracking_status = new_point_estimate.tracking_status;
-
- // Store internal keypoints
- old_points_internal[idx] = old_point_internal;
- new_points_internal[idx] = new_point_internal;
-}
-
-/** Truncates the coordinates stored in new_points array
- *
- * @param[in] new_points_internal An array of estimate key points that are defined at the new_images high resolution pyramid.
- * @param[out] new_points An array of internal key points that are defined at the new_images high resolution pyramid.
- */
-__kernel void finalize(
- __global InternalKeypoint *new_points_internal,
- __global Keypoint *new_points)
-{
- int idx = get_global_id(0);
-
- // Load internal keypoint
- InternalKeypoint new_point_internal = new_points_internal[idx];
-
- // Calculate output point
- Keypoint new_point;
- new_point.x = round(new_point_internal.x);
- new_point.y = round(new_point_internal.y);
- new_point.strength = 0.f;
- new_point.scale = 0.f;
- new_point.orientation = 0.f;
- new_point.tracking_status = new_point_internal.tracking_status;
- new_point.error = 0.f;
-
- // Store new point
- new_points[idx] = new_point;
-}
-
-/** Computes A11, A12, A22, min_eig, ival, ixval and iyval at level 0th of the pyramid. These values will be used in step 1.
- *
- * @param[in] old_image_ptr Pointer to the input old image. Supported data types: U8
- * @param[in] old_image_stride_x Stride of the input old image in X dimension (in bytes)
- * @param[in] old_image_step_x old_image_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] old_image_stride_y Stride of the input old image in Y dimension (in bytes)
- * @param[in] old_image_step_y old_image_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] old_image_offset_first_element_in_bytes The offset of the first element in the input old image
- * @param[in] old_scharr_gx_ptr Pointer to the input scharr x image. Supported data types: S16
- * @param[in] old_scharr_gx_stride_x Stride of the input scharr x image in X dimension (in bytes)
- * @param[in] old_scharr_gx_step_x old_scharr_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] old_scharr_gx_stride_y Stride of the input scharr x image in Y dimension (in bytes)
- * @param[in] old_scharr_gx_step_y old_scharr_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] old_scharr_gx_offset_first_element_in_bytes The offset of the first element in the input scharr x image
- * @param[in] old_scharr_gy_ptr Pointer to the input scharr y image. Supported data types: S16
- * @param[in] old_scharr_gy_stride_x Stride of the input scharr y image in X dimension (in bytes)
- * @param[in] old_scharr_gy_step_x old_scharr_gy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] old_scharr_gy_stride_y Stride of the input scharr y image in Y dimension (in bytes)
- * @param[in] old_scharr_gy_step_y old_scharr_gy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] old_scharr_gy_offset_first_element_in_bytes The offset of the first element in the input scharr y image
- * @param[in] old_points An array of key points. Those key points are defined at the old_images high resolution pyramid
- * @param[in, out] new_points An output array of key points. Those key points are defined at the new_images high resolution pyramid
- * @param[out] coeff It stores | A11 | A12 | A22 | min_eig | for each keypoint
- * @param[out] iold_val It stores | ival | ixval | iyval | dummy | for each point in the window centered on old_keypoint
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] window_dimension_pow2 The squared size of the window on which to perform the algorithm
- * @param[in] half_window The half size of the window on which to perform the algorithm
- * @param[in] border_limits It stores the right border limit (width - window_dimension - 1, height - window_dimension - 1,)
- * @param[in] eig_const 1.0f / (float)(2.0f * window_dimension * window_dimension)
- * @param[in] level0 It is set to 1 if level 0 of the pyramid
- */
-void __kernel lktracker_stage0(
- IMAGE_DECLARATION(old_image),
- IMAGE_DECLARATION(old_scharr_gx),
- IMAGE_DECLARATION(old_scharr_gy),
- __global float4 *old_points,
- __global float4 *new_points,
- __global float4 *coeff,
- __global short4 *iold_val,
- const int window_dimension,
- const int window_dimension_pow2,
- const int half_window,
- const float3 border_limits,
- const float eig_const,
- const int level0)
-{
- int idx = get_global_id(0);
-
- Image old_image = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_image);
- Image old_scharr_gx = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_scharr_gx);
- Image old_scharr_gy = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_scharr_gy);
-
- // Get old keypoint
- float2 old_keypoint = old_points[idx].xy - (float2)half_window;
-
- // Get the floor value
- float2 iold_keypoint = floor(old_keypoint);
-
- // Check if using the window dimension we can go out of boundary in the following for loops. If so, invalidate the tracked point
- if(any(iold_keypoint < border_limits.zz) || any(iold_keypoint >= border_limits.xy))
- {
- if(level0 == 1)
- {
- // Invalidate tracked point as we are at level 0
- new_points[idx].s2 = 0.0f;
- }
-
- // Not valid coordinate. It sets min_eig to 0.0f
- coeff[idx].s3 = 0.0f;
-
- return;
- }
-
- // Compute weight for the bilinear interpolation
- float2 ab = old_keypoint - iold_keypoint;
-
- // Weight used for Bilinear-Interpolation on Scharr images
- // w_scharr.s0 = (1.0f - ab.x) * (1.0f - ab.y)
- // w_scharr.s1 = ab.x * (1.0f - ab.y)
- // w_scharr.s2 = (1.0f - ab.x) * ab.y
- // w_scharr.s3 = ab.x * ab.y
-
- float4 w_scharr;
- w_scharr.s3 = ab.x * ab.y;
- w_scharr.s0 = w_scharr.s3 + 1.0f - ab.x - ab.y;
- w_scharr.s12 = ab - (float2)w_scharr.s3;
-
- // Weight used for Bilinear-Interpolation on Old and New images
- // w.s0 = round(w_scharr.s0 * D0)
- // w.s1 = round(w_scharr.s1 * D0)
- // w.s2 = round(w_scharr.s2 * D0)
- // w.s3 = w.s3 = D0 - w.s0 - w.s1 - w.s2
-
- float4 w;
- w = round(w_scharr * (float4)D0);
- w.s3 = D0 - w.s0 - w.s1 - w.s2; // Added for matching VX implementation
-
- // G.s0 = A11, G.s1 = A12, G.s2 = A22, G.s3 = min_eig
- int4 iG = (int4)0;
-
- // Window offset
- int window_offset = idx * window_dimension_pow2;
-
- // Compute Spatial Gradient Matrix G
- for(ushort ky = 0; ky < window_dimension; ++ky)
- {
- int offset_y = iold_keypoint.y + ky;
- for(ushort kx = 0; kx < window_dimension; ++kx)
- {
- int offset_x = iold_keypoint.x + kx;
- float4 px;
-
- // Load values from old_image for computing the bilinear interpolation
- px = convert_float4((uchar4)(vload2(0, offset(&old_image, offset_x, offset_y)),
- vload2(0, offset(&old_image, offset_x, offset_y + 1))));
-
- // old_i.s0 = ival, old_i.s1 = ixval, old_i.s2 = iyval, old_i.s3 = dummy
- float4 old_i;
-
- // Compute bilinear interpolation (with D1 scale factor) for ival
- old_i.s0 = dot(px, w) * D1;
-
- // Load values from old_scharr_gx for computing the bilinear interpolation
- px = convert_float4((short4)(vload2(0, (__global short *)offset(&old_scharr_gx, offset_x, offset_y)),
- vload2(0, (__global short *)offset(&old_scharr_gx, offset_x, offset_y + 1))));
-
- // Compute bilinear interpolation for ixval
- old_i.s1 = dot(px, w_scharr);
-
- // Load values from old_scharr_gy for computing the bilinear interpolation
- px = convert_float4((short4)(vload2(0, (__global short *)offset(&old_scharr_gy, offset_x, offset_y)),
- vload2(0, (__global short *)offset(&old_scharr_gy, offset_x, offset_y + 1))));
-
- // Compute bilinear interpolation for iyval
- old_i.s2 = dot(px, w_scharr);
-
- // Rounding (it could be omitted. Used just for matching the VX implementation)
- int4 iold = convert_int4(round(old_i));
-
- // Accumulate values in the Spatial Gradient Matrix
- iG.s0 += (int)(iold.s1 * iold.s1);
- iG.s1 += (int)(iold.s1 * iold.s2);
- iG.s2 += (int)(iold.s2 * iold.s2);
-
- // Store ival, ixval and iyval
- iold_val[window_offset + kx] = convert_short4(iold);
- }
- window_offset += window_dimension;
- }
-
- // Scale iA11, iA12 and iA22
- float4 G = convert_float4(iG) * (float4)FLT_SCALE;
-
- // Compute minimum eigen value
- G.s3 = (float)(G.s2 + G.s0 - sqrt(pown(G.s0 - G.s2, 2) + 4.0f * G.s1 * G.s1)) * eig_const;
-
- // Store A11. A11, A22 and min_eig
- coeff[idx] = G;
-}
-
-/** Computes the motion vector for a given keypoint
- *
- * @param[in] new_image_ptr Pointer to the input new image. Supported data types: U8
- * @param[in] new_image_stride_x Stride of the input new image in X dimension (in bytes)
- * @param[in] new_image_step_x new_image_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] new_image_stride_y Stride of the input new image in Y dimension (in bytes)
- * @param[in] new_image_step_y new_image_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] new_image_offset_first_element_in_bytes The offset of the first element in the input new image
- * @param[in, out] new_points An output array of key points. Those key points are defined at the new_images high resolution pyramid
- * @param[in] coeff The | A11 | A12 | A22 | min_eig | for each keypoint
- * @param[in] iold_val The | ival | ixval | iyval | dummy | for each point in the window centered on old_keypoint
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] window_dimension_pow2 The squared size of the window on which to perform the algorithm
- * @param[in] half_window The half size of the window on which to perform the algorithm
- * @param[in] num_iterations The maximum number of iterations
- * @param[in] epsilon The value for terminating the algorithm.
- * @param[in] border_limits It stores the right border limit (width - window_dimension - 1, height - window_dimension - 1,)
- * @param[in] eig_const 1.0f / (float)(2.0f * window_dimension * window_dimension)
- * @param[in] level0 It is set to 1 if level of pyramid = 0
- * @param[in] term_epsilon It is set to 1 if termination = TERM_CRITERIA_EPSILON
- */
-void __kernel lktracker_stage1(
- IMAGE_DECLARATION(new_image),
- __global float4 *new_points,
- __global float4 *coeff,
- __global short4 *iold_val,
- const int window_dimension,
- const int window_dimension_pow2,
- const int half_window,
- const int num_iterations,
- const float epsilon,
- const float3 border_limits,
- const float eig_const,
- const int level0,
- const int term_epsilon)
-{
- int idx = get_global_id(0);
- Image new_image = CONVERT_TO_IMAGE_STRUCT_NO_STEP(new_image);
-
- // G.s0 = A11, G.s1 = A12, G.s2 = A22, G.s3 = min_eig
- float4 G = coeff[idx];
-
- // Determinant
- float D = G.s0 * G.s2 - G.s1 * G.s1;
-
- // Check if it is a good point to track
- if(G.s3 < EIGENVALUE_THR || D < DETERMINANT_THR)
- {
- if(level0 == 1)
- {
- // Invalidate tracked point as we are at level 0
- new_points[idx].s2 = 0;
- }
-
- return;
- }
-
- // Compute inverse
- //D = native_recip(D);
- D = 1.0 / D;
-
- // Get new keypoint
- float2 new_keypoint = new_points[idx].xy - (float)half_window;
-
- // Get new point
- float2 out_new_point = new_points[idx].xy;
-
- // Keep delta obtained in the previous iteration
- float2 prev_delta = (float2)0.0f;
-
- int j = 0;
- while(j < num_iterations)
- {
- // Get the floor value
- float2 inew_keypoint = floor(new_keypoint);
-
- // Check if using the window dimension we can go out of boundary in the following for loops. If so, invalidate the tracked point
- if(any(inew_keypoint < border_limits.zz) || any(inew_keypoint >= border_limits.xy))
- {
- if(level0 == 1)
- {
- // Invalidate tracked point as we are at level 0
- new_points[idx].s2 = 0.0f;
- }
- else
- {
- new_points[idx].xy = out_new_point;
- }
-
- return;
- }
-
- // Compute weight for the bilinear interpolation
- float2 ab = new_keypoint - inew_keypoint;
-
- // Weight used for Bilinear-Interpolation on Old and New images
- // w.s0 = round((1.0f - ab.x) * (1.0f - ab.y) * D0)
- // w.s1 = round(ab.x * (1.0f - ab.y) * D0)
- // w.s2 = round((1.0f - ab.x) * ab.y * D0)
- // w.s3 = D0 - w.s0 - w.s1 - w.s2
-
- float4 w;
- w.s3 = ab.x * ab.y;
- w.s0 = w.s3 + 1.0f - ab.x - ab.y;
- w.s12 = ab - (float2)w.s3;
- w = round(w * (float4)D0);
- w.s3 = D0 - w.s0 - w.s1 - w.s2;
-
- // Mismatch vector
- int2 ib = 0;
-
- // Old val offset
- int old_val_offset = idx * window_dimension_pow2;
-
- for(int ky = 0; ky < window_dimension; ++ky)
- {
- for(int kx = 0; kx < window_dimension; ++kx)
- {
- // ival, ixval and iyval have been computed in the previous stage
- int4 old_ival = convert_int4(iold_val[old_val_offset]);
-
- // Load values from old_image for computing the bilinear interpolation
- float4 px = convert_float4((uchar4)(vload2(0, offset(&new_image, inew_keypoint.x + kx, inew_keypoint.y + ky)),
- vload2(0, offset(&new_image, inew_keypoint.x + kx, inew_keypoint.y + ky + 1))));
-
- // Compute bilinear interpolation on new image
- int jval = (int)round(dot(px, w) * D1);
-
- // Compute luminance difference
- int diff = (int)(jval - old_ival.s0);
-
- // Accumulate values in mismatch vector
- ib += (diff * old_ival.s12);
-
- // Update old val offset
- old_val_offset++;
- }
- }
-
- float2 b = convert_float2(ib) * (float2)FLT_SCALE;
-
- // Optical Flow
- float2 delta;
-
- delta.x = (float)((G.s1 * b.y - G.s2 * b.x) * D);
- delta.y = (float)((G.s1 * b.x - G.s0 * b.y) * D);
-
- // Update new point coordinate
- new_keypoint += delta;
-
- out_new_point = new_keypoint + (float2)half_window;
-
- if(term_epsilon == 1)
- {
- float mag2 = dot(delta, delta);
-
- if(mag2 <= epsilon)
- {
- new_points[idx].xy = out_new_point;
-
- return;
- }
- }
-
- // Check convergence analyzing the previous delta
- if(j > 0 && all(fabs(delta + prev_delta) < (float2)0.01f))
- {
- out_new_point -= delta * (float2)0.5f;
-
- new_points[idx].xy = out_new_point;
-
- return;
- }
-
- // Update previous delta
- prev_delta = delta;
-
- j++;
- }
-
- new_points[idx].xy = out_new_point;
-}
diff --git a/src/core/CL/cl_kernels/scharr_filter.cl b/src/core/CL/cl_kernels/scharr_filter.cl
deleted file mode 100644
index d2868b6731..0000000000
--- a/src/core/CL/cl_kernels/scharr_filter.cl
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This OpenCL kernel computes Scharr3x3.
- *
- * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient
- * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required.
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_gx_ptr Pointer to the destination image Supported data types: S16
- * @param[in] dst_gx_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_gx_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_gx_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_gx_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[out] dst_gy_ptr Pointer to the destination image. Supported data types: S16
- * @param[in] dst_gy_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_gy_step_x dst_gy_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_gy_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_gy_step_y dst_gy_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void scharr3x3(
- IMAGE_DECLARATION(src)
-#ifdef GRAD_X
- ,
- IMAGE_DECLARATION(dst_gx)
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- ,
- IMAGE_DECLARATION(dst_gy)
-#endif /* GRAD_Y */
-)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
-#ifdef GRAD_X
- Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx);
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy);
-#endif /* GRAD_Y */
-
- // Output pixels
-#ifdef GRAD_X
- short8 gx = (short8)0;
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- short8 gy = (short8)0;
-#endif /* GRAD_Y */
-
- // Row0
- uchar16 temp = vload16(0, offset(&src, -1, -1));
- short8 left = convert_short8(temp.s01234567);
- short8 middle = convert_short8(temp.s12345678);
- short8 right = convert_short8(temp.s23456789);
-#ifdef GRAD_X
- gx += left * (short8)(-3);
- gx += right * (short8)(+3);
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- gy += left * (short8)(-3);
- gy += middle * (short8)(-10);
- gy += right * (short8)(-3);
-#endif /* GRAD_Y */
-
- // Row1
- temp = vload16(0, offset(&src, -1, 0));
- left = convert_short8(temp.s01234567);
- right = convert_short8(temp.s23456789);
-#ifdef GRAD_X
- gx += left * (short8)(-10);
- gx += right * (short8)(+10);
-#endif /* GRAD_X */
-
- // Row2
- temp = vload16(0, offset(&src, -1, 1));
- left = convert_short8(temp.s01234567);
- middle = convert_short8(temp.s12345678);
- right = convert_short8(temp.s23456789);
-#ifdef GRAD_X
- gx += left * (short8)(-3);
- gx += right * (short8)(+3);
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- gy += left * (short8)(+3);
- gy += middle * (short8)(+10);
- gy += right * (short8)(+3);
-#endif /* GRAD_Y */
-
- // Store results
-#ifdef GRAD_X
- vstore8(gx, 0, ((__global short *)dst_gx.ptr));
-#endif /* GRAD_X */
-#ifdef GRAD_Y
- vstore8(gy, 0, ((__global short *)dst_gy.ptr));
-#endif /* GRAD_Y */
-}
diff --git a/src/core/CL/cl_kernels/tablelookup.cl b/src/core/CL/cl_kernels/tablelookup.cl
deleted file mode 100644
index 0ef1648d94..0000000000
--- a/src/core/CL/cl_kernels/tablelookup.cl
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** This function performs table lookup on U8 input/output images.
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- *
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] lut LUT table. Supported data types: U8
- */
-__kernel void tablelookup_U8(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst),
- __global uchar *lut)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- /* Load input data */
- uchar8 data = vload8(0, src.ptr);
-
- /* Load lut data */
- uchar8 lut_data = (uchar8)(lut[data.s0], lut[data.s1], lut[data.s2], lut[data.s3],
- lut[data.s4], lut[data.s5], lut[data.s6], lut[data.s7]);
-
- /* Store result */
- vstore8(lut_data, 0, dst.ptr);
-}
-
-/** This function performs table lookup on S16 input/output images.
- *
- * Global Workgroup Size [ DIV_CEIL(width, 8), height ]
- *
- * @param[in] src_ptr Pointer to the source image. Supported data types: S16
- * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: S16
- * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] lut LUT table. Supported data types: S16
- * @param[in] offset LUT offset
- * @param[in] count Number of elements in the LUT
- */
-__kernel void tablelookup_S16(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst),
- __global short *lut,
- uint offset,
- uint count)
-{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
-
- /* Load input data */
- short8 data = vload8(0, (__global short *)src.ptr);
-
- /* Load output data */
- int8 out_data = convert_int8(vload8(0, (__global short *)dst.ptr));
-
- /* Calculate index */
- int8 index = convert_int8(data) + (int8)(offset);
- int8 cond = (index >= 0 && index < (int8)count);
- index = select(0, index, cond);
-
- /* Load lut data */
- int8 lut_data = (int8)(lut[index.s0], lut[index.s1], lut[index.s2], lut[index.s3],
- lut[index.s4], lut[index.s5], lut[index.s6], lut[index.s7]);
-
- /* Select output data depending on condition */
- lut_data = select(out_data, lut_data, cond);
-
- /* Store result */
- vstore8(convert_short8(lut_data), 0, (__global short *)dst.ptr);
-}
diff --git a/src/core/CL/cl_kernels/threshold.cl b/src/core/CL/cl_kernels/threshold.cl
deleted file mode 100644
index ff3ac05ef4..0000000000
--- a/src/core/CL/cl_kernels/threshold.cl
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-/** Perform binary thresholding on an image.
- *
- * @param[in] in_ptr Pointer to the source image
- * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[out] out_ptr Pointer to the destination image
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] false_val False value
- * @param[in] true_val True value
- * @param[in] threshold The thresold value
- */
-__kernel void threshold_binary(
- IMAGE_DECLARATION(in),
- IMAGE_DECLARATION(out),
- const uchar false_val,
- const uchar true_val,
- const uchar threshold)
-{
- // Get pixels pointer
- Image in = CONVERT_TO_IMAGE_STRUCT(in);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
-
- // Load data
- uchar16 in_data = vload16(0, in.ptr);
-
- // Perform binary thresholding
- in_data = select((uchar16)false_val, (uchar16)true_val, in_data > (uchar16)threshold);
-
- // Store result
- vstore16(in_data, 0, out.ptr);
-}
-
-/** Perform range thresholding on an image.
- *
- * @param[in] in_ptr Pointer to the source image
- * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in_offset_first_element_in_bytes The offset of the first element in the first source image
- * @param[out] out_ptr Pointer to the destination image
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] false_val False value
- * @param[in] true_val True value
- * @param[in] lower Lower threshold
- * @param[in] upper Upper threshold
- */
-__kernel void threshold_range(
- IMAGE_DECLARATION(in),
- IMAGE_DECLARATION(out),
- const uchar false_val,
- const uchar true_val,
- const uchar lower,
- const uchar upper)
-{
- // Get pixels pointer
- Image in = CONVERT_TO_IMAGE_STRUCT(in);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
-
- // Load data
- uchar16 in_data = vload16(0, in.ptr);
-
- // Perform range thresholding
- in_data = select((uchar16)true_val, (uchar16)false_val, in_data > (uchar16)upper || in_data < (uchar16)lower);
-
- // Store result
- vstore16(in_data, 0, out.ptr);
-}
diff --git a/src/core/CL/cl_kernels/warp_affine.cl b/src/core/CL/cl_kernels/warp_affine.cl
deleted file mode 100644
index 909b92055b..0000000000
--- a/src/core/CL/cl_kernels/warp_affine.cl
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "warp_helpers.h"
-
-/** Returns a vector of floats contaning the matrix coefficients. */
-inline const float8 build_affine_mtx()
-{
- return (float8)(MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, 0, 0);
-}
-
-/** Transforms 4 2D coordinates using the formula:
- *
- * x0 = M[1][1] * x + M[1][2] * y + M[1][3]
- * y0 = M[2][1] * x + M[2][2] * y + M[2][3]
- *
- * @param[in] coord 2D coordinate to transform.
- * @param[in] mtx affine matrix
- *
- * @return a int8 containing 4 2D transformed values.
- */
-inline const float8 apply_affine_transform(const float2 coord, const float8 mtx)
-{
- const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
- // transform [x,x+1,x+2,x+3]
- const float4 new_x = mad(/*A*/ in_x_coords, (float4)(mtx.s0) /*B*/, mad((float4)(coord.s1), (float4)(mtx.s2), (float4)(mtx.s4)));
- // transform [y,y+1,y+2,y+3]
- const float4 new_y = mad(in_x_coords, (float4)(mtx.s1), mad((float4)(coord.s1), (float4)(mtx.s3), (float4)(mtx.s5)));
- return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
-}
-
-/** Performs an affine transform on an image interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel U8.
- *
- * This kernel performs an affine transform with a 2x3 Matrix M with this method of pixel coordinate translation:
- * x0 = M[1][1] * x + M[1][2] * y + M[1][3]
- * y0 = M[2][1] * x + M[2][2] * y + M[2][3]
- * output(x,y) = input(x0,y0)
- *
- * @attention The matrix coefficients need to be passed at compile time:\n
- * const char build_options [] = "-DMAT0=1 -DMAT1=2 -DMAT2=1 -DMAT3=2 -DMAT4=4 -DMAT5=2 "\n
- * clBuildProgram( program, 0, NULL, build_options, NULL, NULL);
- *
- * @param[in] in_ptr Pointer to the source image. Supported data types: U8.
- * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8.
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image
- * @param[in] width Width of the destination image
- * @param[in] height Height of the destination image
- */
-__kernel void warp_affine_nearest_neighbour(
- IMAGE_DECLARATION(in),
- IMAGE_DECLARATION(out),
- const int width,
- const int height)
-{
- Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
- vstore4(read_texels4(&in, convert_int8_rtn(clamp_to_border(apply_affine_transform(get_current_coords(), build_affine_mtx()), width, height))), 0, out.ptr);
-}
-
-/** Performs an affine transform on an image interpolating with the BILINEAR method. Input and output are single channel U8.
- *
- * @attention The matrix coefficients need to be passed at compile time:\n
- * const char build_options [] = "-DMAT0=1 -DMAT1=2 -DMAT2=1 -DMAT3=2 -DMAT4=4 -DMAT5=2 "\n
- * clBuildProgram( program, 0, NULL, build_options, NULL, NULL);
- *
- * @param[in] in_ptr Pointer to the source image. Supported data types: U8.
- * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8.
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image
- * @param[in] width Width of the destination image
- * @param[in] height Height of the destination image
- */
-__kernel void warp_affine_bilinear(
- IMAGE_DECLARATION(in),
- IMAGE_DECLARATION(out),
- const int width,
- const int height)
-{
- Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
- vstore4(bilinear_interpolate(&in, apply_affine_transform(get_current_coords(), build_affine_mtx()), width, height), 0, out.ptr);
-}
diff --git a/src/core/CL/cl_kernels/warp_perspective.cl b/src/core/CL/cl_kernels/warp_perspective.cl
deleted file mode 100644
index bed78388a4..0000000000
--- a/src/core/CL/cl_kernels/warp_perspective.cl
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-#include "warp_helpers.h"
-
-/** Returns the perspective matrix */
-inline const float16 build_perspective_mtx()
-{
- return (float16)(MAT0, MAT1, MAT2, MAT3, MAT4, MAT5, MAT6, MAT7, MAT8, 0, 0, 0, (float4)0);
-}
-
-/** Transforms four 2D coordinates using the formula:
- *
- * x0 = M[1][1] * x + M[1][2] * y + M[1][3]
- * y0 = M[2][1] * x + M[2][2] * y + M[2][3]
- * z0 = M[3][1] * x + M[3][2] * y + M[3][3]
- *
- * (x0/z0,y0/z0)
- *
- * @param[in] coord 2D coordinate to transform.
- * @param[in] mtx perspective matrix
- *
- * @return a vector float8 containing four 2D transformed values.
- */
-inline const float8 apply_perspective_transform(const float2 coord, const float16 mtx)
-{
- const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
- // transform [z,z+1,z+2,z+3]
- const float4 z = (float4)mad(in_x_coords, (float4)(mtx.s2), mad((float4)(coord.s1), (float4)(mtx.s5), (float4)(mtx.s8)));
- // NOTE: Do not multiply x&y by 1.f/Z as this will result in loss of accuracy and mismatches with VX reference implementation
- // transform [x,x+1,x+2,x+3]
- const float4 new_x = (float4)mad(in_x_coords, (float4)(mtx.s0), mad((float4)(coord.s1), (float4)(mtx.s3), (float4)(mtx.s6))) / z;
- // transform [y,y+1,y+2,y+3]
- const float4 new_y = (float4)mad(in_x_coords, (float4)(mtx.s1), mad((float4)(coord.s1), (float4)(mtx.s4), (float4)(mtx.s7))) / z;
- return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
-}
-
-/** Performs perspective transformation on an image interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel U8.
- *
- * This kernel performs perspective transform with a 3x3 Matrix M with this method of pixel coordinate translation:
- * x0 = M[1][1] * x + M[1][2] * y + M[1][3]
- * y0 = M[2][1] * x + M[2][2] * y + M[2][3]
- * z0 = M[3][1] * x + M[3][2] * y + M[3][3]
- *
- * output(x,y) = input(x0/z0,y0/z0)
- *
- * @attention The matrix coefficients need to be passed at compile time:\n
- * const char build_options [] = "-DMAT0=1 -DMAT1=2 -DMAT2=3 -DMAT3=4 -DMAT4=5 -DMAT5=6 -DMAT6=7 -DMAT7=8 -DMAT8=9"\n
- * clBuildProgram( program, 0, NULL, build_options, NULL, NULL);
- *
- * @param[in] in_ptr Pointer to the source image. Supported data types: U8.
- * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8.
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image
- * @param[in] width Width of the destination image
- * @param[in] height Height of the destination image
- */
-__kernel void warp_perspective_nearest_neighbour(
- IMAGE_DECLARATION(in),
- IMAGE_DECLARATION(out),
- const int width,
- const int height)
-{
- Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
- vstore4(read_texels4(&in, convert_int8_rtn(clamp_to_border(apply_perspective_transform(get_current_coords(), build_perspective_mtx()), width, height))), 0, out.ptr);
-}
-
-/** Performs a perspective transform on an image interpolating with the BILINEAR method. Input and output are single channel U8.
- *
- * @attention The matrix coefficients need to be passed at compile time:\n
- * const char build_options [] = "-DMAT0=1 -DMAT1=2 -DMAT2=3 -DMAT3=4 -DMAT4=5 -DMAT5=6 -DMAT6=7 -DMAT7=8 -DMAT8=9"\n
- * clBuildProgram( program, 0, NULL, build_options, NULL, NULL);
- *
- * @param[in] in_ptr Pointer to the source image. Supported data types: U8.
- * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8.
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes)
- * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image
- * @param[in] width Width of the destination image
- * @param[in] height Height of the destination image
- */
-__kernel void warp_perspective_bilinear(
- IMAGE_DECLARATION(in),
- IMAGE_DECLARATION(out),
- const int width,
- const int height)
-{
- Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
- vstore4(bilinear_interpolate(&in, apply_perspective_transform(get_current_coords(), build_perspective_mtx()), width, height), 0, out.ptr);
-}
diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
deleted file mode 100644
index 76b60cb9f8..0000000000
--- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-
-#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLAbsoluteDifferenceKernel::CLAbsoluteDifferenceKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
-}
-
-void CLAbsoluteDifferenceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
- "The output image can only be U8 if both input images are U8");
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.insert("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
- build_opts.insert("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
- build_opts.insert("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
-
- // Create kernel
- _kernel = create_kernel(compile_context, "absdiff", build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 16;
-
- Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input1_access, input2_access, output_access);
-
- ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
- input2->info()->valid_region());
-
- output_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLAbsoluteDifferenceKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input1, slice);
- add_2D_tensor_argument(idx, _input2, slice);
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h
deleted file mode 100644
index 28f28fe44f..0000000000
--- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
-#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the absolute difference kernel.
- *
- * Absolute difference is computed by:
- * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
- */
-class CLAbsoluteDifferenceKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLAbsoluteDifferenceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default;
- /** Default destructor */
- ~CLAbsoluteDifferenceKernel() = default;
-
- /** Set the inputs and output images.
- *
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1. */
- const ICLTensor *_input2; /**< Source tensor 2. */
- ICLTensor *_output; /**< Destination tensor. */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */
diff --git a/src/core/CL/kernels/CLAccumulateKernel.cpp b/src/core/CL/kernels/CLAccumulateKernel.cpp
deleted file mode 100644
index b0a8eba644..0000000000
--- a/src/core/CL/kernels/CLAccumulateKernel.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLAccumulateKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-} // namespace
-
-void CLAccumulateKernel::configure(const ICLTensor *input, ICLTensor *accum)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, accum);
-}
-
-void CLAccumulateKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16);
-
- // Create kernel
- _kernel = create_kernel(compile_context, "accumulate");
-
- // Make sure _kernel is initialized before calling the parent's configure
- ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration);
-}
-
-void CLAccumulateWeightedKernel::configure(const ICLTensor *input, float alpha, ICLTensor *accum)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, alpha, accum);
-}
-
-void CLAccumulateWeightedKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(alpha < 0.0 || alpha > 1.0);
-
- // Create kernel
- _kernel = create_kernel(compile_context, "accumulate_weighted");
-
- // Set static kernel arguments
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, alpha);
-
- // Configure kernel window
- ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration);
-}
-
-void CLAccumulateSquaredKernel::configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, shift, accum);
-}
-
-void CLAccumulateSquaredKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16);
- ARM_COMPUTE_ERROR_ON(shift > 15);
-
- // Create kernel
- _kernel = create_kernel(compile_context, "accumulate_squared");
-
- // Set static kernel arguments
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, shift);
-
- // Configure kernel window
- ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration);
-}
-} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLAccumulateKernel.h b/src/core/CL/kernels/CLAccumulateKernel.h
deleted file mode 100644
index 16a715319d..0000000000
--- a/src/core/CL/kernels/CLAccumulateKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H
-#define ARM_COMPUTE_CLACCUMULATEKERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the accumulate kernel.
- *
- * Accumulation is computed by:
- * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
- */
-class CLAccumulateKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation tensors.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, ICLTensor *accum);
- /** Set the input and accumulation tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
-};
-
-/** Interface for the accumulate weighted kernel.
- *
- * Weighted accumulation is computed:
- * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
- *
- * Where @f$ 0 \le \alpha \le 1 @f$
- * Conceptually, the rounding for this is defined as:
- * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
-*/
-class CLAccumulateWeightedKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation images, and the scale value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
- /** Set the input and accumulation images, and the scale value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
-};
-
-/** Interface for the accumulate squared kernel.
- *
- * The accumulation of squares is computed:
- * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
- *
- * Where @f$ 0 \le shift \le 15 @f$
-*/
-class CLAccumulateSquaredKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */
diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp
deleted file mode 100644
index 9f493b4fb8..0000000000
--- a/src/core/CL/kernels/CLBox3x3Kernel.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLBox3x3Kernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-BorderSize CLBox3x3Kernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLBox3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-
- _input = input;
- _output = output;
-
- // Set build options
- std::set<std::string> build_opts = { "-DMAT0=1", "-DMAT1=1", "-DMAT2=1",
- "-DMAT3=1", "-DMAT4=1", "-DMAT5=1",
- "-DMAT6=1", "-DMAT7=1", "-DMAT8=1",
- "-DSCALE=9", "-DDATA_TYPE_OUT=uchar"
- };
-
- // Create kernel
- _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
diff --git a/src/core/CL/kernels/CLBox3x3Kernel.h b/src/core/CL/kernels/CLBox3x3Kernel.h
deleted file mode 100644
index 2373c4a928..0000000000
--- a/src/core/CL/kernels/CLBox3x3Kernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H
-#define ARM_COMPUTE_CLBOX3X3KERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the box 3x3 filter kernel.
- *
- */
-class CLBox3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- //Inherited methods overriden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */
diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp
deleted file mode 100644
index 1fe944c8a2..0000000000
--- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-CLGradientKernel::CLGradientKernel()
- : _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr)
-{
-}
-
-void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type)
-{
- configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, norm_type);
-}
-
-void CLGradientKernel::configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_MSG(data_size_from_type(gx->info()->data_type()) != data_size_from_type(gy->info()->data_type()),
- "Gx and Gy must have the same pixel size");
- ARM_COMPUTE_ERROR_ON_MSG(data_size_from_type(gx->info()->data_type()) != data_size_from_type(magnitude->info()->data_type()),
- "Mag must have the same pixel size as Gx and Gy");
-
- _gx = gx;
- _gy = gy;
- _magnitude = magnitude;
- _phase = phase;
-
- // Create build opts
- std::set<std::string> built_opts;
- built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(gx->info()->data_type()));
- built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(gx->info()->data_type()));
-
- // Create kernel
- const std::string kernel_name = (norm_type == 1) ? std::string("combine_gradients_L1") : std::string("combine_gradients_L2");
- _kernel = create_kernel(compile_context, kernel_name, built_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 4;
-
- Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal gx_access(_gx->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal gy_access(_gy->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access);
-
- mag_access.set_valid_region(win, _gx->info()->valid_region());
- phase_access.set_valid_region(win, _gx->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(gx->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(gx->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(gx->info()->dimension(1));
-}
-
-void CLGradientKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _gx, slice);
- add_2D_tensor_argument(idx, _gy, slice);
- add_2D_tensor_argument(idx, _magnitude, slice);
- add_2D_tensor_argument(idx, _phase, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-CLEdgeNonMaxSuppressionKernel::CLEdgeNonMaxSuppressionKernel()
- : _magnitude(nullptr), _phase(nullptr), _output(nullptr)
-{
-}
-
-BorderSize CLEdgeNonMaxSuppressionKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), magnitude, phase, output, lower_thr, border_undefined);
-}
-
-void CLEdgeNonMaxSuppressionKernel::configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16, DataType::U32);
-
- _magnitude = magnitude;
- _phase = phase;
- _output = output;
-
- // Create build opts
- std::set<std::string> built_opts;
- built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(magnitude->info()->data_type()));
- built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
-
- // Create kernel
- const std::string kernel_name = std::string("suppress_non_maximum");
- _kernel = create_kernel(compile_context, kernel_name, built_opts);
-
- // Set minimum threshold argument
- unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, lower_thr);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 1;
- constexpr unsigned int num_elems_read_written_per_iteration = 3;
-
- Window win = calculate_max_window(*_magnitude->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle mag_access(_magnitude->info(), -border_size().left, -border_size().top,
- num_elems_read_written_per_iteration, num_elems_read_written_per_iteration);
- AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, mag_access, phase_access, output_access);
-
- output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(output->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-void CLEdgeNonMaxSuppressionKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _magnitude, slice);
- add_2D_tensor_argument(idx, _phase, slice);
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-CLEdgeTraceKernel::CLEdgeTraceKernel()
- : _input(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0), _visited(nullptr), _recorded(nullptr), _l1_stack(nullptr), _l1_stack_counter(nullptr)
-{
-}
-
-void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, visited, recorded, l1_stack, l1_stack_counter);
-}
-
-void CLEdgeTraceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::U32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(visited, 1, DataType::U32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(recorded, 1, DataType::U32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(l1_stack, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(l1_stack_counter, 1, DataType::U8);
-
- _input = input;
- _output = output;
- _lower_thr = lower_thr;
- _upper_thr = upper_thr;
- _visited = visited;
- _recorded = recorded;
- _l1_stack = l1_stack;
- _l1_stack_counter = l1_stack_counter;
-
- // Create build opts
- std::set<std::string> built_opts;
- built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
- built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
-
- // Create kernel
- const std::string kernel_name = std::string("hysteresis");
- _kernel = create_kernel(compile_context, kernel_name, built_opts);
-
- // Set constant kernel args
- unsigned int width = _input->info()->dimension(0);
- unsigned int height = _input->info()->dimension(1);
- unsigned int idx = 6 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, static_cast<cl_uint>(_lower_thr));
- _kernel.setArg(idx++, static_cast<cl_uint>(_upper_thr));
- _kernel.setArg(idx++, static_cast<cl_uint>(width));
- _kernel.setArg(idx++, static_cast<cl_uint>(height));
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 1;
- Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal visited_access(_visited->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal recorded_access(_recorded->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal l1_stack_access(_l1_stack->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal l1_stack_counter_access(_l1_stack_counter->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowHorizontal(_input->info(), 0, num_elems_processed_per_iteration),
- output_access,
- visited_access,
- recorded_access,
- l1_stack_access,
- l1_stack_counter_access);
-
- output_access.set_valid_region(win, _input->info()->valid_region());
- visited_access.set_valid_region(win, _input->info()->valid_region());
- recorded_access.set_valid_region(win, _input->info()->valid_region());
- l1_stack_access.set_valid_region(win, _input->info()->valid_region());
- l1_stack_counter_access.set_valid_region(win, _input->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += lower_string(string_from_format(output->info()->format()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
-}
-
-void CLEdgeTraceKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _output, slice);
- add_2D_tensor_argument(idx, _visited, slice);
- add_2D_tensor_argument(idx, _recorded, slice);
- add_2D_tensor_argument(idx, _l1_stack, slice);
- add_2D_tensor_argument(idx, _l1_stack_counter, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.h b/src/core/CL/kernels/CLCannyEdgeKernel.h
deleted file mode 100644
index 7543822d8d..0000000000
--- a/src/core/CL/kernels/CLCannyEdgeKernel.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H
-#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform Gradient computation.
- */
-class CLGradientKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGradientKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGradientKernel(const CLGradientKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGradientKernel &operator=(const CLGradientKernel &) = delete;
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and mag must all be the same size (either 16 or 32).
- *
- * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
- * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- */
- void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and mag must all be the same size (either 16 or 32).
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
- * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_gx; /**< Source tensor - Gx component */
- const ICLTensor *_gy; /**< Source tensor - Gy component */
- ICLTensor *_magnitude; /**< Destination tensor - Magnitude */
- ICLTensor *_phase; /**< Destination tensor - Quantized phase */
-};
-
-/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge.
- *
- * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
- * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed.
- *
- * @note Hysteresis is computed in @ref CLEdgeTraceKernel
- */
-class CLEdgeNonMaxSuppressionKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLEdgeNonMaxSuppressionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete;
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16/U32.
- * @param[in] lower_thr Lower threshold.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16/U32.
- * @param[in] lower_thr Lower threshold.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */
- const ICLTensor *_phase; /**< Source tensor - Quantized phase. */
- ICLTensor *_output; /**< Destination tensor. */
-};
-
-/** OpenCL kernel to perform Edge tracing.
- */
-class CLEdgeTraceKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLEdgeTraceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U16/U32.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
- * Expected to be initialized to 0 before each run.
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U16/U32.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
- * Expected to be initialized to 0 before each run.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor. */
- ICLTensor *_output; /**< Destination tensor. */
- int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */
- int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */
- ICLTensor *_visited; /**< Marks visited elements */
- ICLTensor *_recorded; /**< Marks recorded elements */
- ICLTensor *_l1_stack; /**< L1 hysteris stack */
- ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp
deleted file mode 100644
index 52ba9dd065..0000000000
--- a/src/core/CL/kernels/CLChannelCombineKernel.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLChannelCombineKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLMultiImage.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/MultiImageInfo.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <set>
-#include <string>
-
-namespace arm_compute
-{
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-} // namespace
-
-CLChannelCombineKernel::CLChannelCombineKernel()
- : _planes{ { nullptr } }, _output(nullptr), _output_multi(nullptr), _x_subsampling{ { 1, 1, 1 } }, _y_subsampling{ { 1, 1, 1 } }
-{
-}
-
-void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, plane3, output);
-}
-
-void CLChannelCombineKernel::configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane1);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane2);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
-
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::RGB888, Format::RGBA8888, Format::YUYV422, Format::UYVY422);
-
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8);
-
- const Format output_format = output->info()->format();
-
- // Check if horizontal dimension of Y plane is even and validate horizontal sub-sampling dimensions for U and V planes
- if(Format::YUYV422 == output_format || Format::UYVY422 == output_format)
- {
- // Validate Y plane of input and output
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output);
-
- // Validate U and V plane of the input
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2);
- }
-
- _planes[0] = plane0;
- _planes[1] = plane1;
- _planes[2] = plane2;
- _planes[3] = nullptr;
-
- // Validate the last input tensor only for RGBA format
- if(Format::RGBA8888 == output_format)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR(plane3);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane3);
-
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane3, Format::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane3, 1, DataType::U8);
-
- _planes[3] = plane3;
- }
-
- _output = output;
- _output_multi = nullptr;
-
- // Half the processed elements for U and V channels due to horizontal sub-sampling of 2
- if(Format::YUYV422 == output_format || Format::UYVY422 == output_format)
- {
- _x_subsampling[1] = 2;
- _x_subsampling[2] = 2;
- }
-
- // Create kernel
- std::string kernel_name = "channel_combine_" + string_from_format(output_format);
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Configure window
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal plane0_access(plane0->info(), 0, num_elems_processed_per_iteration);
- AccessWindowRectangle plane1_access(plane1->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
- AccessWindowRectangle plane2_access(plane2->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
- AccessWindowHorizontal plane3_access(plane3 == nullptr ? nullptr : plane3->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, plane0_access, plane1_access, plane2_access, plane3_access, output_access);
-
- ValidRegion valid_region = intersect_valid_regions(plane0->info()->valid_region(),
- plane1->info()->valid_region(),
- plane2->info()->valid_region());
- if(plane3 != nullptr)
- {
- valid_region = intersect_valid_regions(plane3->info()->valid_region(), valid_region);
- }
- output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, output);
-}
-
-void CLChannelCombineKernel::configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane1);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane2);
-
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444);
-
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8);
-
- const Format output_format = output->info()->format();
-
- // Validate shape of Y plane to be even and shape of sub-sampling dimensions for U and V planes
- // Perform validation only for formats which require sub-sampling.
- if(Format::YUV444 != output_format)
- {
- // Validate Y plane of input and output
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output->plane(0));
-
- // Validate U and V plane of the input
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2);
-
- // Validate second plane U (NV12 and NV21 have a UV88 combined plane while IYUV has only the U plane)
- // MultiImage generates the correct tensor shape but also check in case the tensor shape of planes was changed to a wrong size
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(1));
-
- // Validate the last plane V of format IYUV
- if(Format::IYUV == output_format)
- {
- // Validate Y plane of the output
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(2));
- }
- }
-
- // Set input tensors
- _planes[0] = plane0;
- _planes[1] = plane1;
- _planes[2] = plane2;
- _planes[3] = nullptr;
-
- // Set output tensor
- _output = nullptr;
- _output_multi = output;
-
- bool has_two_planars = false;
-
- // Set sub-sampling parameters for each plane
- std::string kernel_name;
- std::set<std::string> build_opts;
-
- if(Format::NV12 == output_format || Format::NV21 == output_format)
- {
- _x_subsampling = { { 1, 2, 2 } };
- _y_subsampling = { { 1, 2, 2 } };
- kernel_name = "channel_combine_NV";
- build_opts.emplace(Format::NV12 == output_format ? "-DNV12" : "-DNV21");
- has_two_planars = true;
- }
- else
- {
- if(Format::IYUV == output_format)
- {
- _x_subsampling = { { 1, 2, 2 } };
- _y_subsampling = { { 1, 2, 2 } };
- }
-
- kernel_name = "copy_planes_3p";
- build_opts.emplace(Format::IYUV == output_format ? "-DIYUV" : "-DYUV444");
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure window
- Window win = calculate_max_window(*plane0->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowRectangle input_plane0_access(plane0->info(), 0, 0, num_elems_processed_per_iteration, 1.f);
- AccessWindowRectangle input_plane1_access(plane1->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
- AccessWindowRectangle input_plane2_access(plane2->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
- AccessWindowRectangle output_plane0_access(output->plane(0)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f, 1.f / _y_subsampling[1]);
- AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
- AccessWindowRectangle output_plane2_access(has_two_planars ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
-
- update_window_and_padding(win,
- input_plane0_access, input_plane1_access, input_plane2_access,
- output_plane0_access, output_plane1_access, output_plane2_access);
-
- ValidRegion plane0_valid_region = plane0->info()->valid_region();
- ValidRegion output_plane1_region = has_two_planars ? intersect_valid_regions(plane1->info()->valid_region(), plane2->info()->valid_region()) : plane2->info()->valid_region();
- output_plane0_access.set_valid_region(win, ValidRegion(plane0_valid_region.anchor, output->plane(0)->info()->tensor_shape()));
- output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape()));
- output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- slice.set_dimension_step(Window::DimY, 1);
-
- do
- {
- // Subsampling in plane 1
- Window win_sub_plane1(slice);
- win_sub_plane1.set(Window::DimX, Window::Dimension(win_sub_plane1.x().start() / _x_subsampling[1], win_sub_plane1.x().end() / _x_subsampling[1], win_sub_plane1.x().step() / _x_subsampling[1]));
- win_sub_plane1.set(Window::DimY, Window::Dimension(win_sub_plane1.y().start() / _y_subsampling[1], win_sub_plane1.y().end() / _y_subsampling[1], 1));
-
- // Subsampling in plane 2
- Window win_sub_plane2(slice);
- win_sub_plane2.set(Window::DimX, Window::Dimension(win_sub_plane2.x().start() / _x_subsampling[2], win_sub_plane2.x().end() / _x_subsampling[2], win_sub_plane2.x().step() / _x_subsampling[2]));
- win_sub_plane2.set(Window::DimY, Window::Dimension(win_sub_plane2.y().start() / _y_subsampling[2], win_sub_plane2.y().end() / _y_subsampling[2], 1));
-
- unsigned int idx = 0;
-
- // Set inputs
- add_2D_tensor_argument(idx, _planes[0], slice);
- add_2D_tensor_argument(idx, _planes[1], win_sub_plane1);
- add_2D_tensor_argument(idx, _planes[2], win_sub_plane2);
- add_2D_tensor_argument_if((nullptr != _planes[3]), idx, _planes[3], slice);
-
- // Set outputs
- if(nullptr != _output) // Single planar output
- {
- add_2D_tensor_argument(idx, _output, slice);
- }
- else // Multi-planar output
- {
- // Reduce slice in case of subsampling to avoid out-of bounds access
- slice.set(Window::DimY, Window::Dimension(slice.y().start() / _y_subsampling[1], slice.y().end() / _y_subsampling[1], 1));
-
- add_2D_tensor_argument(idx, _output_multi->cl_plane(0), slice);
- add_2D_tensor_argument(idx, _output_multi->cl_plane(1), win_sub_plane1);
- add_2D_tensor_argument_if((3 == num_planes_from_format(_output_multi->info()->format())), idx, _output_multi->cl_plane(2), win_sub_plane2);
-
- _kernel.setArg(idx++, slice.y().end());
- }
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.h b/src/core/CL/kernels/CLChannelCombineKernel.h
deleted file mode 100644
index f19995aa8e..0000000000
--- a/src/core/CL/kernels/CLChannelCombineKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
-#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-#include <array>
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the channel combine kernel */
-class CLChannelCombineKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelCombineKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelCombineKernel(const CLChannelCombineKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelCombineKernel(CLChannelCombineKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default;
- /** Default destructor */
- ~CLChannelCombineKernel() = default;
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
- */
- void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
- */
- void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- std::array<const ICLTensor *, 4> _planes;
- ICLTensor *_output;
- ICLMultiImage *_output_multi;
- std::array<uint32_t, 3> _x_subsampling;
- std::array<uint32_t, 3> _y_subsampling;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */
diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp
deleted file mode 100644
index cbf504b98b..0000000000
--- a/src/core/CL/kernels/CLChannelExtractKernel.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLChannelExtractKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLMultiImage.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/MultiImageInfo.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLChannelExtractKernel::CLChannelExtractKernel()
- : _input(nullptr), _output(nullptr), _num_elems_processed_per_iteration(8), _subsampling(1)
-{
-}
-
-void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
-}
-
-void CLChannelExtractKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_ON(input == output);
-
- set_format_if_unknown(*output->info(), Format::U8);
-
- // Check if input tensor has a valid format
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::RGB888, Format::RGBA8888, Format::YUYV422, Format::UYVY422);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
-
- // Check if channel is valid for given format
- const Format format = input->info()->format();
- ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel);
-
- // Half the processed elements for U,V channels due to sub-sampling of 2
- _subsampling = 1;
-
- if(format == Format::YUYV422 || format == Format::UYVY422)
- {
- // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422)
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input);
-
- if(channel != Channel::Y)
- {
- _subsampling = 2;
- }
- }
-
- // Calculate output tensor shape using subsampling
- TensorShape output_shape = calculate_subsampled_shape(input->info()->tensor_shape(), format, channel);
- set_shape_if_empty(*output->info(), output_shape);
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-
- _input = input;
- _output = output;
-
- // Create kernel
- std::string kernel_name = "channel_extract_" + string_from_format(format);
- std::set<std::string> build_opts = { ("-DCHANNEL_" + string_from_channel(channel)) };
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure window
- Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, _num_elems_processed_per_iteration);
- AccessWindowRectangle output_access(output->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _subsampling, 1.f / _subsampling);
-
- update_window_and_padding(win, input_access, output_access);
-
- ValidRegion input_valid_region = input->info()->valid_region();
- output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
-}
-
-void CLChannelExtractKernel::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
-
- set_format_if_unknown(*output->info(), Format::U8);
-
- // Check if channel is valid for given format
- const Format format = input->info()->format();
- ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel);
-
- // Get input plane from the given channel
- const ICLImage *input_plane = input->cl_plane(plane_idx_from_channel(format, channel));
- ARM_COMPUTE_ERROR_ON_NULLPTR(input_plane);
-
- if(Channel::Y == channel && format != Format::YUV444)
- {
- // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422)
- ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input_plane);
- }
-
- // Calculate 2x2 subsampled tensor shape
- TensorShape output_shape = calculate_subsampled_shape(input->cl_plane(0)->info()->tensor_shape(), format, channel);
- set_shape_if_empty(*output->info(), output_shape);
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output_shape, output->info()->tensor_shape());
-
- // Check if input tensor has a valid format
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444);
- ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8);
-
- _output = output;
- _input = input_plane;
- _subsampling = 1;
-
- // Create kernel
- std::string kernel_name;
- std::set<std::string> build_opts;
- if(Channel::Y == channel || Format::IYUV == format || Format::YUV444 == format)
- {
- kernel_name = "copy_plane";
- }
- else
- {
- kernel_name = "channel_extract_" + string_from_format(format);
- build_opts.insert(("-DCHANNEL_" + string_from_channel(channel)));
- }
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure window
- Window win = calculate_max_window(*input_plane->info(), Steps(_num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input_plane->info(), 0, _num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input_plane->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-}
-
-void CLChannelExtractKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- Window win_sub(slice);
- win_sub.set(Window::DimX, Window::Dimension(win_sub.x().start() / _subsampling, win_sub.x().end() / _subsampling, win_sub.x().step() / _subsampling));
- win_sub.set(Window::DimY, Window::Dimension(win_sub.y().start() / _subsampling, win_sub.y().end() / _subsampling, 1));
-
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _output, win_sub);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLChannelExtractKernel.h b/src/core/CL/kernels/CLChannelExtractKernel.h
deleted file mode 100644
index 37abde548c..0000000000
--- a/src/core/CL/kernels/CLChannelExtractKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
-#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the channel extract kernel */
-class CLChannelExtractKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelExtractKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelExtractKernel(const CLChannelExtractKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelExtractKernel(CLChannelExtractKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default;
- /** Default destructor */
- ~CLChannelExtractKernel() = default;
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Must be of U8 format.
- */
- void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar 2D destination image. Must be of U8 format.
- */
- void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar 2D destination image. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- uint32_t _num_elems_processed_per_iteration;
- uint32_t _subsampling;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
deleted file mode 100644
index 6c61fec997..0000000000
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLColorConvertKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLMultiImage.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/MultiImageInfo.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <sstream>
-
-using namespace arm_compute;
-
-CLColorConvertKernel::CLColorConvertKernel()
- : _input(nullptr), _output(nullptr), _multi_input(nullptr), _multi_output(nullptr)
-{
-}
-
-void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- unsigned int num_elems_processed_per_iteration = 0;
- switch(input->info()->format())
- {
- case Format::RGBA8888:
- {
- switch(output->info()->format())
- {
- case Format::RGB888:
- num_elems_processed_per_iteration = 16;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- case Format::UYVY422:
- case Format::YUYV422:
- {
- switch(output->info()->format())
- {
- case Format::RGB888:
- case Format::RGBA8888:
- num_elems_processed_per_iteration = 8;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- case Format::RGB888:
- {
- switch(output->info()->format())
- {
- case Format::RGBA8888:
- case Format::U8:
- num_elems_processed_per_iteration = 16;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- default:
- break;
- }
- ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
- string_from_format(input->info()->format()).c_str(),
- string_from_format(output->info()->format()).c_str());
-
- std::stringstream kernel_name;
-
- kernel_name << string_from_format(input->info()->format());
- kernel_name << "_to_";
- kernel_name << string_from_format(output->info()->format());
- kernel_name << "_bt709";
-
- _input = input;
- _output = output;
-
- // Create kernel
- _kernel = create_kernel(compile_context, kernel_name.str());
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name.str();
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- unsigned int num_elems_processed_per_iteration = 0;
-
- switch(input->info()->format())
- {
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- {
- switch(output->info()->format())
- {
- case Format::RGB888:
- case Format::RGBA8888:
- num_elems_processed_per_iteration = 4;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- default:
- break;
- }
- ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
- string_from_format(input->info()->format()).c_str(),
- string_from_format(output->info()->format()).c_str());
-
- std::stringstream kernel_name;
-
- kernel_name << string_from_format(input->info()->format());
- kernel_name << "_to_";
- kernel_name << string_from_format(output->info()->format());
- kernel_name << "_bt709";
-
- _multi_input = input;
- _output = output;
-
- // Create kernel
- _kernel = create_kernel(compile_context, kernel_name.str());
-
- // Configure kernel window
- const bool has_two_planes = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
- const float sub_sampling = (has_two_planes || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
-
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
- win.set_dimension_step(Window::DimY, 2);
-
- AccessWindowHorizontal plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
- AccessWindowRectangle plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
- sub_sampling, sub_sampling);
- AccessWindowRectangle plane2_access(has_two_planes ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
- sub_sampling, sub_sampling);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win,
- plane0_access, plane1_access, plane2_access,
- output_access);
-
- ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
- input->plane(2)->info()->valid_region());
- output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name.str();
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->plane(0)->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(1));
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->plane(1)->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(1));
-}
-
-void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- unsigned int num_elems_processed_per_iteration = 0;
- unsigned int num_elems_read_per_iteration_x = 0;
-
- bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
- float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
-
- switch(input->info()->format())
- {
- case Format::RGB888:
- case Format::RGBA8888:
- {
- switch(output->info()->format())
- {
- case Format::NV12:
- case Format::IYUV:
- num_elems_processed_per_iteration = 2;
- num_elems_read_per_iteration_x = 8;
- break;
- case Format::YUV444:
- num_elems_processed_per_iteration = 4;
- num_elems_read_per_iteration_x = 16;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- case Format::UYVY422:
- case Format::YUYV422:
- {
- switch(output->info()->format())
- {
- case Format::NV12:
- case Format::IYUV:
- num_elems_processed_per_iteration = 8;
- num_elems_read_per_iteration_x = 8;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- default:
- break;
- }
-
- ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
- string_from_format(input->info()->format()).c_str(),
- string_from_format(output->info()->format()).c_str());
-
- std::stringstream kernel_name;
-
- kernel_name << string_from_format(input->info()->format());
- kernel_name << "_to_";
- kernel_name << string_from_format(output->info()->format());
- kernel_name << "_bt709";
- _input = input;
- _multi_output = output;
-
- // Create kernel
- _kernel = create_kernel(compile_context, kernel_name.str());
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444))
- {
- win.set_dimension_step(Window::DimY, 2);
- }
-
- AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
- AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
- AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0,
- num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x);
-
- update_window_and_padding(win,
- input_access,
- output_plane0_access,
- output_plane1_access,
- output_plane2_access);
-
- ValidRegion input_region = input->info()->valid_region();
-
- output_plane0_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(0)->info()->tensor_shape()));
- output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape()));
- output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name.str();
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output)
-{
- unsigned int num_elems_processed_per_iteration = 0;
- switch(input->info()->format())
- {
- case Format::NV12:
- case Format::NV21:
- {
- switch(output->info()->format())
- {
- case Format::IYUV:
- case Format::YUV444:
- num_elems_processed_per_iteration = 16;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- case Format::IYUV:
- {
- switch(output->info()->format())
- {
- case Format::YUV444:
- case Format::NV12:
- num_elems_processed_per_iteration = 16;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
- break;
- }
- default:
- break;
- }
- ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
- string_from_format(input->info()->format()).c_str(),
- string_from_format(output->info()->format()).c_str());
-
- std::stringstream kernel_name;
-
- kernel_name << string_from_format(input->info()->format());
- kernel_name << "_to_";
- kernel_name << string_from_format(output->info()->format());
- kernel_name << "_bt709";
-
- _multi_input = input;
- _multi_output = output;
-
- // Create kernel
- bool has_two_input_planars = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
- bool has_two_output_planars = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
-
- float sub_sampling_input = (has_two_input_planars || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
- float sub_sampling_output = (has_two_output_planars || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
-
- _kernel = create_kernel(compile_context, kernel_name.str());
-
- Window win = calculate_max_window(*input->cl_plane(0)->info(), Steps(num_elems_processed_per_iteration));
- win.set_dimension_step(Window::DimY, 2);
-
- AccessWindowHorizontal input_plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
- AccessWindowRectangle input_plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
- sub_sampling_input, sub_sampling_input);
- AccessWindowRectangle input_plane2_access(has_two_input_planars ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
- sub_sampling_input, sub_sampling_input);
- AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
- AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
- AccessWindowRectangle output_plane2_access(has_two_output_planars ? nullptr : output->plane(2)->info(), 0, 0,
- num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
-
- update_window_and_padding(win,
- input_plane0_access, input_plane1_access, input_plane2_access,
- output_plane0_access, output_plane1_access, output_plane2_access);
-
- ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
- input->plane(2)->info()->valid_region());
- output_plane0_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(0)->info()->tensor_shape()));
- output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape()));
- output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name.str();
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->plane(0)->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(1));
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->plane(1)->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(1));
-}
-
-void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- if(nullptr != _input && nullptr != _output)
- {
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
- }
- else if(nullptr != _input && nullptr != _multi_output)
- {
- Format format = _multi_output->info()->format();
- do
- {
- Window win_uv(slice);
-
- if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
- {
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- }
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
- for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
- {
- add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_uv);
- }
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
- }
- else if(nullptr != _multi_input && nullptr != _output)
- {
- Format format = _multi_input->info()->format();
- do
- {
- Window win_uv(slice);
-
- if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
- {
- win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
- win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
- }
-
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
-
- for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
- {
- add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_uv);
- }
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
- }
- else if(nullptr != _multi_input && nullptr != _multi_output)
- {
- Format in_format = _multi_input->info()->format();
- Format out_format = _multi_output->info()->format();
- do
- {
- Window win_in_uv(slice);
- if((Format::NV12 == in_format) || (Format::NV21 == in_format) || (Format::IYUV == in_format))
- {
- win_in_uv.set(Window::DimX, Window::Dimension(win_in_uv.x().start() / 2,
- win_in_uv.x().end() / 2, win_in_uv.x().step() / 2));
- win_in_uv.set(Window::DimY, Window::Dimension(win_in_uv.y().start() / 2, win_in_uv.y().end() / 2, 1));
- }
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
- for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
- {
- add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_in_uv);
- }
-
- Window win_out_uv(slice);
- if((Format::NV12 == out_format) || (Format::NV21 == out_format) || (Format::IYUV == out_format))
- {
- win_out_uv.set(Window::DimX, Window::Dimension(win_out_uv.x().start() / 2,
- win_out_uv.x().end() / 2, win_out_uv.x().step() / 2));
- win_out_uv.set(Window::DimY, Window::Dimension(win_out_uv.y().start() / 2, win_out_uv.y().end() / 2, 1));
- }
-
- add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
- for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
- {
- add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_out_uv);
- }
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
- }
- else
- {
- ARM_COMPUTE_ERROR("Not supported");
- }
-}
diff --git a/src/core/CL/kernels/CLColorConvertKernel.h b/src/core/CL/kernels/CLColorConvertKernel.h
deleted file mode 100644
index 0f082914cd..0000000000
--- a/src/core/CL/kernels/CLColorConvertKernel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
-#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the color convert kernel.
- *
- */
-class CLColorConvertKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLColorConvertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLColorConvertKernel(const CLColorConvertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLColorConvertKernel(CLColorConvertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default;
- /** Default destructor. */
- ~CLColorConvertKernel() = default;
-
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const ICLMultiImage *input, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const ICLImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const ICLMultiImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /*pointer to single planar tensor input */
- ICLTensor *_output; /*pointer to single planar tensor output */
- const ICLMultiImage *_multi_input; /*pointer to multi-planar input */
- ICLMultiImage *_multi_output; /*pointer to multi-planar output */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */
diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp
deleted file mode 100644
index 21f1047cc6..0000000000
--- a/src/core/CL/kernels/CLConvolutionKernel.cpp
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLConvolutionKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/CL/ICLKernel.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <sstream>
-#include <string>
-
-namespace arm_compute
-{
-namespace
-{
-constexpr unsigned int max_matrix_size = 81;
-} // namespace
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-template <unsigned int matrix_size>
-BorderSize CLConvolutionKernel<matrix_size>::border_size() const
-{
- return BorderSize(matrix_size / 2);
-}
-
-template <unsigned int matrix_size>
-void CLConvolutionKernel<matrix_size>::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined);
-}
-
-template <unsigned int matrix_size>
-void CLConvolutionKernel<matrix_size>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON(conv == nullptr);
-
- _input = input;
- _output = output;
-
- std::stringstream kernel_name;
- CLBuildOptions build_opts;
- kernel_name << "convolution" << matrix_size << "x" << matrix_size << "_static";
-
- if(scale == 0)
- {
- scale = calculate_matrix_scale(conv, matrix_size);
- }
-
- for(unsigned int i = 0; i < matrix_size * matrix_size; i++)
- {
- std::stringstream mat_str;
- mat_str << "-DMAT" << i << "=" << conv[i];
- build_opts.add_option(mat_str.str());
- }
-
- build_opts.add_option("-DSCALE=" + support::cpp11::to_string(scale));
-
- DataType data_type = data_type_for_convolution_matrix(conv, matrix_size * matrix_size);
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
-
- std::stringstream out_type;
- out_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type());
- build_opts.add_option(out_type.str());
-
- _kernel = create_kernel(compile_context, kernel_name.str(), build_opts.options());
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_rows_read_per_iteration = matrix_size;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
-
-/****************************************************************************************\
- * Separable Convolution *
-\****************************************************************************************/
-template <unsigned int matrix_size>
-CLSeparableConvolutionHorKernel<matrix_size>::CLSeparableConvolutionHorKernel()
- : _border_size(0)
-{
-}
-
-template <unsigned int matrix_size>
-BorderSize CLSeparableConvolutionHorKernel<matrix_size>::border_size() const
-{
- return _border_size;
-}
-
-template <unsigned int matrix_size>
-void CLSeparableConvolutionHorKernel<matrix_size>::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, border_undefined);
-}
-
-template <unsigned int matrix_size>
-void CLSeparableConvolutionHorKernel<matrix_size>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16, DataType::S16, DataType::S32);
-
- ARM_COMPUTE_ERROR_ON((matrix_size != 5) && (matrix_size != 7) && (matrix_size != 9));
-
- _input = input;
- _output = output;
- _border_size = BorderSize(border_undefined ? 0 : matrix_size / 2, matrix_size / 2);
-
- // Set build options
- std::set<std::string> build_opts;
-
- std::array<int16_t, matrix_size *matrix_size> mat = { 0 };
- memcpy(mat.data(), conv, matrix_size * sizeof(int16_t));
-
- for(unsigned int j = 0; j < matrix_size * matrix_size; j++)
- {
- build_opts.insert("-DMAT" + support::cpp11::to_string(j) + "=" + support::cpp11::to_string(mat[j]));
- }
-
- build_opts.insert("-DSCALE=0");
-
- build_opts.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
-
- // Create kernel
- const std::string kernel_name = "convolution_separable1x" + support::cpp11::to_string(matrix_size) + "_static";
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
-
- Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowHorizontal input_access(input->info(), -border_size().left, num_elems_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-template <unsigned int matrix_size>
-BorderSize CLSeparableConvolutionVertKernel<matrix_size>::border_size() const
-{
- return BorderSize{ matrix_size / 2, 0 };
-}
-
-template <unsigned int matrix_size>
-void CLSeparableConvolutionVertKernel<matrix_size>::configure(const ICLTensor *input, ICLTensor *output,
- const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined, data_type);
-}
-
-template <unsigned int matrix_size>
-void CLSeparableConvolutionVertKernel<matrix_size>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
- const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON((matrix_size != 5) && (matrix_size != 7) && (matrix_size != 9));
- ARM_COMPUTE_ERROR_ON(scale == 0);
-
- _input = input;
- _output = output;
-
- std::set<std::string> build_opts;
-
- std::array<int16_t, matrix_size *matrix_size> mat = { 0 };
- memcpy(mat.data() + matrix_size, conv, matrix_size * sizeof(int16_t));
-
- for(unsigned int j = 0; j < matrix_size * matrix_size; j++)
- {
- build_opts.insert("-DMAT" + support::cpp11::to_string(j) + "=" + support::cpp11::to_string(mat[j]));
- }
-
- build_opts.insert("-DSCALE=" + support::cpp11::to_string(scale));
-
- build_opts.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
-
- build_opts.insert("-DCOMPUTE_TYPE=" + get_cl_type_from_data_type(data_type));
-
- std::stringstream out_type;
- out_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type());
- build_opts.insert(out_type.str());
-
- // Create kernel
- const std::string kernel_name = "convolution_separable" + support::cpp11::to_string(matrix_size) + "x1_static";
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 8;
- constexpr unsigned int num_rows_read_per_iteration = matrix_size;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(data_type));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-CLConvolutionRectangleKernel::CLConvolutionRectangleKernel()
- : _border_size(0), _input(nullptr), _output(nullptr)
-{
-}
-
-BorderSize CLConvolutionRectangleKernel::border_size() const
-{
- return _border_size;
-}
-
-void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, width, height, scale, border_undefined);
-}
-
-void CLConvolutionRectangleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale,
- bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON(nullptr == conv);
- ARM_COMPUTE_ERROR_ON(3 != width && 5 != width && 7 != width && 9 != width);
- ARM_COMPUTE_ERROR_ON(3 != height && 5 != height && 7 != height && 9 != height);
- ARM_COMPUTE_ERROR_ON(0 == scale);
-
- _input = input;
- _output = output;
- _border_size = BorderSize(height / 2, width / 2);
-
- std::set<std::string> options;
-
- std::stringstream output_type;
- output_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type());
- options.insert(output_type.str());
-
- uint32_t matrix_size = width * height;
-
- std::array<int16_t, max_matrix_size> mat = { 0 };
-
- memcpy(mat.data(), conv, matrix_size * sizeof(int16_t));
-
- for(unsigned int j = 0; j < max_matrix_size; j++)
- {
- options.insert("-DMAT" + support::cpp11::to_string(j) + "=" + support::cpp11::to_string(mat[j]));
- }
-
- options.insert("-DSCALE=" + support::cpp11::to_string(scale));
-
- DataType data_type = data_type_for_convolution_matrix(conv, matrix_size);
- options.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
-
- options.insert("-DMATRIX_WIDTH=" + support::cpp11::to_string(width));
- options.insert("-DMATRIX_HEIGHT=" + support::cpp11::to_string(height));
-
- _kernel = create_kernel(compile_context, "convolution_rectangle", options);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- const unsigned int num_rows_read_per_iteration = height;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
-
-void CLConvolutionRectangleKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-template class arm_compute::CLConvolutionKernel<3>;
-template class arm_compute::CLConvolutionKernel<5>;
-template class arm_compute::CLConvolutionKernel<7>;
-template class arm_compute::CLConvolutionKernel<9>;
-template class arm_compute::CLSeparableConvolutionVertKernel<5>;
-template class arm_compute::CLSeparableConvolutionVertKernel<7>;
-template class arm_compute::CLSeparableConvolutionVertKernel<9>;
-template class arm_compute::CLSeparableConvolutionHorKernel<5>;
-template class arm_compute::CLSeparableConvolutionHorKernel<7>;
-template class arm_compute::CLSeparableConvolutionHorKernel<9>;
-} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLConvolutionKernel.h b/src/core/CL/kernels/CLConvolutionKernel.h
deleted file mode 100644
index 33e73caf11..0000000000
--- a/src/core/CL/kernels/CLConvolutionKernel.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
-#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
- * The client can supply a convolution matrix \f$ C_{m,n} \f$.
- * @f{eqnarray}{
- * k_0 &=& \frac{m}{2} \\
- * l_0 &=& \frac{n}{2} \\
- * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
- * @f}
- *
- * @note The above equation for this function is similar to the default OpenCV Filter2D function,
- * which actually computes a correlation and not a convolution.
- * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
- */
-template <unsigned int matrix_size>
-class CLConvolutionKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-
-/** Interface for the kernel which applies a 3x3 convolution to a tensor. */
-using CLConvolution3x3Kernel = CLConvolutionKernel<3>;
-/** Interface for the kernel which applies a 5x5 convolution to a tensor. */
-using CLConvolution5x5Kernel = CLConvolutionKernel<5>;
-/** Interface for the kernel which applies a 7x7 convolution to a tensor. */
-using CLConvolution7x7Kernel = CLConvolutionKernel<7>;
-/** Interface for the kernel which applies a 9x9 convolution to a tensor. */
-using CLConvolution9x9Kernel = CLConvolutionKernel<9>;
-
-/****************************************************************************************\
- * Separable Square Convolution *
-\****************************************************************************************/
-
-/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */
-template <unsigned int matrix_size>
-class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel
-{
-public:
- /** Default Constructor */
- CLSeparableConvolutionHorKernel();
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U16/S16/S32.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */
-using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>;
-/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */
-using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>;
-/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */
-using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>;
-
-/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */
-template <unsigned int matrix_size>
-class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U16/S16/S32.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U16/S16/S32.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-
-/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */
-using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>;
-/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */
-using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>;
-/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */
-using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-/** Kernel for the running convolution on a rectangle matrix.
- *
- * @note Supports combinations of 3,5,7 and 9.
- */
-class CLConvolutionRectangleKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLConvolutionRectangleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size;
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */
diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp
deleted file mode 100644
index 5ff11362cc..0000000000
--- a/src/core/CL/kernels/CLDerivativeKernel.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLDerivativeKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLDerivativeKernel::CLDerivativeKernel()
- : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_derivative_x(false), _run_derivative_y(false)
-{
-}
-
-BorderSize CLDerivativeKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
-}
-
-void CLDerivativeKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
-
- _run_derivative_x = output_x != nullptr;
- _run_derivative_y = output_y != nullptr;
-
- if(_run_derivative_x)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16);
- }
-
- if(_run_derivative_y)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16);
- }
-
- _input = input;
- _output_x = output_x;
- _output_y = output_y;
-
- // Set build options
- std::set<std::string> build_opts;
-
- if(_run_derivative_x)
- {
- build_opts.insert("-DGRAD_X");
- }
-
- if(_run_derivative_y)
- {
- build_opts.insert("-DGRAD_Y");
- }
-
- // Create kernel
- const std::string kernel_name = std::string("derivative");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- constexpr unsigned int num_read_rows_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), 0, 0, 0, 0);
- AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_processed_per_iteration);
- if(_run_derivative_x && _run_derivative_y)
- {
- // TODO(COMPMID-415) Fix x-access input bug in CL kernel instead of '+2'
- input_access = AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_processed_per_iteration + 2, num_read_rows_per_iteration);
- }
- else if(_run_derivative_x)
- {
- // TODO(COMPMID-415) Fix x-access input bug in CL kernel instead of '+2'
- input_access = AccessWindowHorizontal(input->info(), -border_size().left, num_elems_processed_per_iteration + 2);
- }
- else if(_run_derivative_y)
- {
- input_access = AccessWindowRectangle(input->info(), 0, -border_size().top, num_elems_processed_per_iteration, num_read_rows_per_iteration);
- }
-
- update_window_and_padding(win,
- input_access,
- output_x_access,
- output_y_access);
-
- output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-void CLDerivativeKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument_if((_run_derivative_x), idx, _output_x, slice);
- add_2D_tensor_argument_if((_run_derivative_y), idx, _output_y, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLDerivativeKernel.h b/src/core/CL/kernels/CLDerivativeKernel.h
deleted file mode 100644
index 14dd05d084..0000000000
--- a/src/core/CL/kernels/CLDerivativeKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H
-#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the derivative kernel. */
-class CLDerivativeKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDerivativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDerivativeKernel(const CLDerivativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDerivativeKernel(CLDerivativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default;
- /** Default destructor */
- ~CLDerivativeKernel() = default;
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */
- ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */
- bool _run_derivative_x; /**< Do we need to run Derivative X ? */
- bool _run_derivative_y; /**< Do we need to run Derivative Y ? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */
diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp
deleted file mode 100644
index cac5bc1c72..0000000000
--- a/src/core/CL/kernels/CLDilateKernel.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLDilateKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-using namespace arm_compute;
-
-BorderSize CLDilateKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLDilateKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-
- // Create kernel
- _kernel = create_kernel(compile_context, "dilate");
-
- _input = input;
- _output = output;
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
diff --git a/src/core/CL/kernels/CLDilateKernel.h b/src/core/CL/kernels/CLDilateKernel.h
deleted file mode 100644
index 591ec8ccfc..0000000000
--- a/src/core/CL/kernels/CLDilateKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDILATEKERNEL_H
-#define ARM_COMPUTE_CLDILATEKERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the dilate kernel.
- *
- */
-class CLDilateKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */
diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp
deleted file mode 100644
index f6d98a5488..0000000000
--- a/src/core/CL/kernels/CLErodeKernel.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLErodeKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-using namespace arm_compute;
-
-BorderSize CLErodeKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLErodeKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-
- // Create kernel
- _kernel = create_kernel(compile_context, "erode");
-
- _input = input;
- _output = output;
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_rows_read_pes_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_pes_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
diff --git a/src/core/CL/kernels/CLErodeKernel.h b/src/core/CL/kernels/CLErodeKernel.h
deleted file mode 100644
index 4da97ae358..0000000000
--- a/src/core/CL/kernels/CLErodeKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLERODEKERNEL_H
-#define ARM_COMPUTE_CLERODEKERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the erode kernel.
- *
- */
-class CLErodeKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLERODEKERNEL_H */
diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp
deleted file mode 100644
index 7481fd1c27..0000000000
--- a/src/core/CL/kernels/CLFastCornersKernel.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLFastCornersKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLFastCornersKernel::CLFastCornersKernel()
- : ICLKernel(), _input(nullptr), _output(nullptr)
-{
-}
-
-BorderSize CLFastCornersKernel::border_size() const
-{
- return BorderSize(3);
-}
-
-void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, non_max_suppression, border_mode);
-}
-
-void CLFastCornersKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_MSG(border_mode != BorderMode::UNDEFINED, "Not implemented");
-
- _input = input;
- _output = output;
-
- // Create build options
- std::set<std::string> build_opts;
-
- if(non_max_suppression)
- {
- build_opts.emplace("-DUSE_MAXSUPPRESSION");
- }
-
- // Create kernel
- const std::string kernel_name = std::string("fast_corners");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Set static kernel arguments
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); // Skip the input and output parameters
- _kernel.setArg<cl_float>(idx, static_cast<float>(threshold));
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 1;
- constexpr unsigned int num_elems_read_per_iteration = 7;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_mode == BorderMode::UNDEFINED, BorderSize(3));
-
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_mode == BorderMode::UNDEFINED, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(non_max_suppression);
- _config_id += "_";
- _config_id += lower_string(string_from_border_mode(border_mode));
-}
-
-void CLFastCornersKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-CLCopyToArrayKernel::CLCopyToArrayKernel()
- : ICLKernel(), _input(nullptr), _corners(nullptr), _num_buffer(nullptr)
-{
-}
-
-void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, update_number, corners, num_buffers);
-}
-
-void CLCopyToArrayKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(corners == nullptr);
- ARM_COMPUTE_ERROR_ON(num_buffers == nullptr);
-
- _input = input;
- _corners = corners;
- _num_buffer = num_buffers;
-
- std::set<std::string> build_opts;
-
- if(update_number)
- {
- build_opts.emplace("-DUPDATE_NUMBER");
- }
-
- // Create kernel
- const std::string kernel_name = std::string("copy_to_keypoint");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- //Get how many pixels skipped in the x dimension in the previous stages
- unsigned int offset = _input->info()->valid_region().anchor.x();
-
- // Set static kernel arguments
- unsigned int idx = num_arguments_per_2D_tensor(); // Skip the input and output parameters
- _kernel.setArg<unsigned int>(idx++, _corners->max_num_values());
- _kernel.setArg<cl_uint>(idx++, offset);
- _kernel.setArg(idx++, *_num_buffer);
- _kernel.setArg(idx++, _corners->cl_buffer());
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 1;
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-void CLCopyToArrayKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- //Initialise the _num_buffer as it used as both input and output
- static const unsigned int zero_init = 0;
- queue.enqueueWriteBuffer(*_num_buffer, CL_FALSE, 0, sizeof(unsigned int), &zero_init);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLFastCornersKernel.h b/src/core/CL/kernels/CLFastCornersKernel.h
deleted file mode 100644
index 0c1b564c2f..0000000000
--- a/src/core/CL/kernels/CLFastCornersKernel.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H
-#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-#include <cstdint>
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** CL kernel to perform fast corners */
-class CLFastCornersKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFastCornersKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFastCornersKernel(const CLFastCornersKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFastCornersKernel(CLFastCornersKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default;
- /** Default destructor */
- ~CLFastCornersKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Output image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_mode Strategy to use for borders.
- */
- void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
- /** Initialise the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Output image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_mode Strategy to use for borders.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLImage *_input;
- ICLImage *_output;
-};
-
-/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */
-class CLCopyToArrayKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCopyToArrayKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default;
- /** Default destructor */
- ~CLCopyToArrayKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] update_number Flag to indicate whether we need to update the number of corners
- * @param[out] corners Array of keypoints to store the results.
- * @param[out] num_buffers Number of keypoints to store the results.
- */
- void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
- /** Initialise the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] update_number Flag to indicate whether we need to update the number of corners
- * @param[out] corners Array of keypoints to store the results.
- * @param[out] num_buffers Number of keypoints to store the results.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input; /**< source image */
- ICLKeyPointArray *_corners; /**< destination array */
- cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */
diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
deleted file mode 100644
index 40e9658ab4..0000000000
--- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-BorderSize CLGaussian3x3Kernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLGaussian3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-
- _input = input;
- _output = output;
-
- // Set build options
- std::set<std::string> build_opts = { "-DMAT0=1", "-DMAT1=2", "-DMAT2=1",
- "-DMAT3=2", "-DMAT4=4", "-DMAT5=2",
- "-DMAT6=1", "-DMAT7=2", "-DMAT8=1",
- "-DSCALE=16", "-DDATA_TYPE_OUT=uchar"
- };
-
- // Create kernel
- _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.h b/src/core/CL/kernels/CLGaussian3x3Kernel.h
deleted file mode 100644
index 139b05d44c..0000000000
--- a/src/core/CL/kernels/CLGaussian3x3Kernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
-#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Gaussian 3x3 filter kernel.
- *
- */
-class CLGaussian3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */
diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp
deleted file mode 100644
index 46a7576154..0000000000
--- a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
-
-#include <cstdint>
-
-using namespace arm_compute;
-
-void CLGaussian5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLGaussian5x5HorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- const std::array<int16_t, 5> matrix = { 1, 4, 6, 4, 1 };
-
- // Set arguments
- CLSeparableConvolution5x5HorKernel::configure(compile_context, input, output, matrix.data(), border_undefined);
-}
-
-void CLGaussian5x5VertKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLGaussian5x5VertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- const uint32_t scale = 256;
- const std::array<int16_t, 5> matrix = { 1, 4, 6, 4, 1 };
-
- // Set arguments
- CLSeparableConvolution5x5VertKernel::configure(compile_context, input, output, matrix.data(), scale, border_undefined);
-}
diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.h b/src/core/CL/kernels/CLGaussian5x5Kernel.h
deleted file mode 100644
index 711710b3b3..0000000000
--- a/src/core/CL/kernels/CLGaussian5x5Kernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
-#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
-
-#include "src/core/CL/kernels/CLConvolutionKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */
-class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel
-{
-public:
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
-private:
- //Make the configure method of the parent class private
- using CLSeparableConvolution5x5HorKernel::configure;
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */
-class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel
-{
-public:
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
-private:
- //Make the configure method of the parent class private
- using CLSeparableConvolution5x5VertKernel::configure;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */
diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
deleted file mode 100644
index 065f7f7e92..0000000000
--- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-CLGaussianPyramidHorKernel::CLGaussianPyramidHorKernel()
- : _l2_load_offset(0)
-{
-}
-
-BorderSize CLGaussianPyramidHorKernel::border_size() const
-{
- return BorderSize{ 0, 2 };
-}
-
-void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLGaussianPyramidHorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16);
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
-
- for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
- }
-
- _input = input;
- _output = output;
-
- // Create kernel
- const std::string kernel_name = std::string("gaussian1x5_sub_x");
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- constexpr unsigned int num_elems_read_per_iteration = 20;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- const float scale_x = static_cast<float>(output->info()->dimension(0)) / input->info()->dimension(0);
-
- Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration, scale_x);
-
- // Sub sampling selects odd pixels (1, 3, 5, ...) for images with even
- // width and even pixels (0, 2, 4, ...) for images with odd width. (Whether
- // a pixel is even or odd is determined based on the tensor shape not the
- // valid region!)
- // Thus the offset from which the first pixel (L2) for the convolution is
- // loaded depends on the anchor and shape of the valid region.
- // In the case of an even shape (= even image width) we need to load L2
- // from -2 if the anchor is odd and from -1 if the anchor is even. That
- // makes sure that L2 is always loaded from an odd pixel.
- // On the other hand, for an odd shape (= odd image width) we need to load
- // L2 from -1 if the anchor is odd and from -2 if the anchor is even to
- // achieve the opposite effect.
- // The condition can be simplified to checking whether anchor + shape is
- // odd (-2) or even (-1) as only adding an odd and an even number will have
- // an odd result.
- _l2_load_offset = -border_size().left;
-
- if((_input->info()->valid_region().anchor[0] + _input->info()->valid_region().shape[0]) % 2 == 0)
- {
- _l2_load_offset += 1;
- }
-
- update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), _l2_load_offset, num_elems_read_per_iteration),
- output_access);
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
-}
-
-void CLGaussianPyramidHorKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window win_in(window);
- win_in.shift(Window::DimX, _l2_load_offset);
-
- //The output is half the width of the input:
- Window win_out(window);
- win_out.scale(Window::DimX, 0.5f);
-
- Window slice_in = win_in.first_slice_window_2D();
- Window slice_out = win_out.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice_in);
- add_2D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out, lws_hint());
- }
- while(win_in.slide_window_slice_2D(slice_in) && win_out.slide_window_slice_2D(slice_out));
-}
-
-CLGaussianPyramidVertKernel::CLGaussianPyramidVertKernel()
- : _t2_load_offset(0)
-{
-}
-
-BorderSize CLGaussianPyramidVertKernel::border_size() const
-{
- return BorderSize{ 2, 0 };
-}
-
-void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLGaussianPyramidVertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
-
- for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
- }
-
- _input = input;
- _output = output;
-
- // Create kernel
- const std::string kernel_name = std::string("gaussian5x1_sub_y");
- _kernel = create_kernel(compile_context, "gaussian5x1_sub_y");
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_rows_processed_per_iteration = 2;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 8;
- constexpr unsigned int num_rows_per_iteration = 5;
-
- const float scale_y = static_cast<float>(output->info()->dimension(1)) / input->info()->dimension(1);
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_rows_processed_per_iteration));
- AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_per_iteration, 1.f, scale_y);
-
- // Determine whether we need to load even or odd rows. See above for a
- // detailed explanation.
- _t2_load_offset = -border_size().top;
-
- if((_input->info()->valid_region().anchor[1] + _input->info()->valid_region().shape[1]) % 2 == 0)
- {
- _t2_load_offset += 1;
- }
-
- update_window_and_padding(win,
- AccessWindowRectangle(input->info(), 0, _t2_load_offset, num_elems_read_per_iteration, num_rows_per_iteration),
- output_access);
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
-}
-
-void CLGaussianPyramidVertKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(window.x().step() != 8);
- ARM_COMPUTE_ERROR_ON(window.y().step() % 2);
-
- Window win_in(window);
- win_in.shift(Window::DimY, _t2_load_offset);
-
- Window win_out(window);
- win_out.scale(Window::DimY, 0.5f);
-
- Window slice_in = win_in.first_slice_window_2D();
- Window slice_out = win_out.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice_in);
- add_2D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out, lws_hint());
- }
- while(win_in.slide_window_slice_2D(slice_in) && win_out.slide_window_slice_2D(slice_out));
-}
diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.h b/src/core/CL/kernels/CLGaussianPyramidKernel.h
deleted file mode 100644
index a6595440f6..0000000000
--- a/src/core/CL/kernels/CLGaussianPyramidKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
-#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
-
-#include "src/core/CL/ICLSimpleKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */
-class CLGaussianPyramidHorKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLGaussianPyramidHorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default;
- /** Default destructor */
- ~CLGaussianPyramidHorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- int _l2_load_offset;
-};
-
-/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */
-class CLGaussianPyramidVertKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLGaussianPyramidVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default;
- /** Default destructor */
- ~CLGaussianPyramidVertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U16.
- * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U16.
- * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- int _t2_load_offset;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */
diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
deleted file mode 100644
index cd3f1ee216..0000000000
--- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <sstream>
-#include <string>
-
-using namespace arm_compute;
-
-CLHOGOrientationBinningKernel::CLHOGOrientationBinningKernel()
- : _input_magnitude(nullptr), _input_phase(nullptr), _output(nullptr), _cell_size()
-{
-}
-
-void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input_magnitude, input_phase, output, hog_info);
-}
-
-void CLHOGOrientationBinningKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_magnitude, 1, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_phase, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(hog_info == nullptr);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, hog_info->num_bins(), DataType::F32);
- ARM_COMPUTE_ERROR_ON(input_magnitude->info()->dimension(Window::DimX) != input_phase->info()->dimension(Window::DimX));
- ARM_COMPUTE_ERROR_ON(input_magnitude->info()->dimension(Window::DimY) != input_phase->info()->dimension(Window::DimY));
-
- _input_magnitude = input_magnitude;
- _input_phase = input_phase;
- _output = output;
- _cell_size = hog_info->cell_size();
-
- float phase_scale = (PhaseType::SIGNED == hog_info->phase_type() ? hog_info->num_bins() / 360.0f : hog_info->num_bins() / 180.0f);
- phase_scale *= (PhaseType::SIGNED == hog_info->phase_type() ? 360.0f / 255.0f : 1.0f);
-
- std::stringstream args_str;
- args_str << "-DCELL_WIDTH=" << hog_info->cell_size().width << " ";
- args_str << "-DCELL_HEIGHT=" << hog_info->cell_size().height << " ";
- args_str << "-DNUM_BINS=" << hog_info->num_bins() << " ";
- args_str << "-DPHASE_SCALE=" << phase_scale << " ";
-
- // Construct kernel name
- std::set<std::string> build_opts = {};
- build_opts.insert(args_str.str());
-
- // Create kernel
- const std::string kernel_name = std::string("hog_orientation_binning");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- constexpr unsigned int num_elems_processed_per_iteration = 1;
- constexpr unsigned int num_elems_read_per_iteration = 1;
- const unsigned int num_rows_read_per_iteration = hog_info->cell_size().height;
- constexpr unsigned int num_elems_written_per_iteration = 1;
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowRectangle(input_magnitude->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration),
- AccessWindowRectangle(input_phase->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration),
- output_access);
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input_magnitude->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input_magnitude->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input_magnitude->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
-}
-
-void CLHOGOrientationBinningKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- // Compute slice for the magnitude and phase tensors
- Window slice_mag_phase = window.first_slice_window_2D();
- slice_mag_phase.set(Window::DimX, Window::Dimension(window.x().start() * _cell_size.width, window.x().start() * _cell_size.width, _cell_size.width));
- slice_mag_phase.set(Window::DimY, Window::Dimension(window.y().start() * _cell_size.height, window.y().start() * _cell_size.height, _cell_size.height));
-
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input_magnitude, slice_mag_phase);
- add_2D_tensor_argument(idx, _input_phase, slice_mag_phase);
- add_2D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-CLHOGBlockNormalizationKernel::CLHOGBlockNormalizationKernel()
- : _input(nullptr), _output(nullptr), _num_cells_per_block_stride()
-{
-}
-
-void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, hog_info);
-}
-
-void CLHOGBlockNormalizationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info)
-{
- ARM_COMPUTE_ERROR_ON(hog_info == nullptr);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, hog_info->num_bins(), DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32);
-
- // Number of cells per block
- const Size2D num_cells_per_block(hog_info->block_size().width / hog_info->cell_size().width,
- hog_info->block_size().height / hog_info->cell_size().height);
-
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, hog_info->num_bins() * num_cells_per_block.area(), DataType::F32);
-
- // Number of cells per block stride
- const Size2D num_cells_per_block_stride(hog_info->block_stride().width / hog_info->cell_size().width,
- hog_info->block_stride().height / hog_info->cell_size().height);
-
- _input = input;
- _output = output;
- _num_cells_per_block_stride = num_cells_per_block_stride;
-
- std::stringstream args_str;
- args_str << "-DL2_HYST_THRESHOLD=" << hog_info->l2_hyst_threshold() << " ";
- args_str << "-DNUM_CELLS_PER_BLOCK_HEIGHT=" << num_cells_per_block.height << " ";
- args_str << "-DNUM_BINS_PER_BLOCK_X=" << num_cells_per_block.width *hog_info->num_bins() << " ";
- args_str << "-DNUM_BINS_PER_BLOCK=" << _output->info()->num_channels() << " ";
- args_str << "-DL2_NORM=" << static_cast<int>(HOGNormType::L2_NORM) << " ";
- args_str << "-DL1_NORM=" << static_cast<int>(HOGNormType::L1_NORM) << " ";
- args_str << "-DL2HYS_NORM=" << static_cast<int>(HOGNormType::L2HYS_NORM) << " ";
- args_str << "-DHOG_NORM_TYPE=" << static_cast<int>(hog_info->normalization_type()) << " ";
-
- // Construct kernel name
- std::set<std::string> build_opts = {};
- build_opts.insert(args_str.str());
-
- const std::string kernel_name = std::string("hog_block_normalization");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- constexpr unsigned int num_elems_processed_per_iteration = 1;
- constexpr unsigned int num_elems_read_per_iteration = 1;
- const unsigned int num_rows_read_per_iteration = num_cells_per_block.height;
- constexpr unsigned int num_elems_written_per_iteration = 1;
- const unsigned int num_rows_written_per_iteration = num_cells_per_block.height;
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration),
- output_access);
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
-}
-
-void CLHOGBlockNormalizationKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- // Compute slice for the magnitude and phase tensors
- Window slice_in = window.first_slice_window_2D();
- slice_in.set_dimension_step(Window::DimX, _num_cells_per_block_stride.width);
- slice_in.set_dimension_step(Window::DimY, _num_cells_per_block_stride.height);
-
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice_in);
- add_2D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.h b/src/core/CL/kernels/CLHOGDescriptorKernel.h
deleted file mode 100644
index eee2fa36bc..0000000000
--- a/src/core/CL/kernels/CLHOGDescriptorKernel.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
-#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
-
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/Size2D.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** OpenCL kernel to perform HOG Orientation Binning */
-class CLHOGOrientationBinningKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGOrientationBinningKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default;
- /** Default destructor */
- ~CLHOGOrientationBinningKernel() = default;
-
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input_magnitude;
- const ICLTensor *_input_phase;
- ICLTensor *_output;
- Size2D _cell_size;
-};
-
-/** OpenCL kernel to perform HOG block normalization */
-class CLHOGBlockNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGBlockNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLHOGBlockNormalizationKernel() = default;
-
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Size2D _num_cells_per_block_stride;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */
diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp
deleted file mode 100644
index 861155b9a2..0000000000
--- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLHOG.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-CLHOGDetectorKernel::CLHOGDetectorKernel()
- : _input(nullptr), _detection_windows(), _num_detection_windows(nullptr)
-{
-}
-
-void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride,
- float threshold, uint16_t idx_class)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, num_detection_windows, detection_window_stride, threshold, idx_class);
-}
-
-void CLHOGDetectorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
- const Size2D &detection_window_stride,
- float threshold, uint16_t idx_class)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F32);
- ARM_COMPUTE_ERROR_ON(hog == nullptr);
- ARM_COMPUTE_ERROR_ON(detection_windows == nullptr);
- ARM_COMPUTE_ERROR_ON(num_detection_windows == nullptr);
- ARM_COMPUTE_ERROR_ON((detection_window_stride.width % hog->info()->block_stride().width) != 0);
- ARM_COMPUTE_ERROR_ON((detection_window_stride.height % hog->info()->block_stride().height) != 0);
-
- const Size2D &detection_window_size = hog->info()->detection_window_size();
- const Size2D &block_size = hog->info()->block_size();
- const Size2D &block_stride = hog->info()->block_stride();
-
- _input = input;
- _detection_windows = detection_windows;
- _num_detection_windows = num_detection_windows;
-
- const unsigned int num_bins_per_descriptor_x = ((detection_window_size.width - block_size.width) / block_stride.width + 1) * input->info()->num_channels();
- const unsigned int num_blocks_per_descriptor_y = (detection_window_size.height - block_size.height) / block_stride.height + 1;
-
- ARM_COMPUTE_ERROR_ON((num_bins_per_descriptor_x * num_blocks_per_descriptor_y + 1) != hog->info()->descriptor_size());
-
- std::stringstream args_str;
- args_str << "-DNUM_BLOCKS_PER_DESCRIPTOR_Y=" << num_blocks_per_descriptor_y << " ";
- args_str << "-DNUM_BINS_PER_DESCRIPTOR_X=" << num_bins_per_descriptor_x << " ";
- args_str << "-DTHRESHOLD=" << threshold << " ";
- args_str << "-DMAX_NUM_DETECTION_WINDOWS=" << detection_windows->max_num_values() << " ";
- args_str << "-DIDX_CLASS=" << idx_class << " ";
- args_str << "-DDETECTION_WINDOW_WIDTH=" << detection_window_size.width << " ";
- args_str << "-DDETECTION_WINDOW_HEIGHT=" << detection_window_size.height << " ";
- args_str << "-DDETECTION_WINDOW_STRIDE_WIDTH=" << detection_window_stride.width << " ";
- args_str << "-DDETECTION_WINDOW_STRIDE_HEIGHT=" << detection_window_stride.height << " ";
-
- // Construct kernel name
- std::set<std::string> build_opts = {};
- build_opts.insert(args_str.str());
-
- // Create kernel
- const std::string kernel_name = std::string("hog_detector");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Set static kernel arguments
- unsigned int idx = num_arguments_per_2D_tensor(); // Skip the input parameters
- _kernel.setArg(idx++, hog->cl_buffer());
- _kernel.setArg(idx++, detection_windows->cl_buffer());
- _kernel.setArg(idx++, *_num_detection_windows);
-
- // Get the number of blocks along the x and y directions of the input tensor
- const ValidRegion &valid_region = input->info()->valid_region();
- const size_t num_blocks_x = valid_region.shape[0];
- const size_t num_blocks_y = valid_region.shape[1];
-
- // Get the number of blocks along the x and y directions of the detection window
- const size_t num_blocks_per_detection_window_x = detection_window_size.width / block_stride.width;
- const size_t num_blocks_per_detection_window_y = detection_window_size.height / block_stride.height;
-
- const size_t window_step_x = detection_window_stride.width / block_stride.width;
- const size_t window_step_y = detection_window_stride.height / block_stride.height;
-
- // Configure kernel window
- Window win;
- win.set(Window::DimX, Window::Dimension(0, floor_to_multiple(num_blocks_x - num_blocks_per_detection_window_x, window_step_x) + window_step_x, window_step_x));
- win.set(Window::DimY, Window::Dimension(0, floor_to_multiple(num_blocks_y - num_blocks_per_detection_window_y, window_step_y) + window_step_y, window_step_y));
-
- constexpr unsigned int num_elems_read_per_iteration = 1;
- const unsigned int num_rows_read_per_iteration = num_blocks_per_descriptor_y;
-
- update_window_and_padding(win, AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-void CLHOGDetectorKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.h b/src/core/CL/kernels/CLHOGDetectorKernel.h
deleted file mode 100644
index c28e6ebe74..0000000000
--- a/src/core/CL/kernels/CLHOGDetectorKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H
-#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLHOG.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform HOG detector kernel using linear SVM */
-class CLHOGDetectorKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGDetectorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default;
- /** Default destructor */
- ~CLHOGDetectorKernel() = default;
-
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] num_detection_windows Number of detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f,
- uint16_t idx_class = 0);
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] num_detection_windows Number of detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
- const Size2D &detection_window_stride, float threshold = 0.0f,
- uint16_t idx_class = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue);
-
-private:
- const ICLTensor *_input;
- ICLDetectionWindowArray *_detection_windows;
- cl::Buffer *_num_detection_windows;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
deleted file mode 100644
index cbc056fb77..0000000000
--- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/AccessWindowStatic.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <sstream>
-#include <string>
-
-using namespace arm_compute;
-
-CLHarrisScoreKernel::CLHarrisScoreKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr), _sensitivity(), _strength_thresh(), _norm_factor(), _border_size(0)
-{
-}
-
-BorderSize CLHarrisScoreKernel::border_size() const
-{
- return _border_size;
-}
-
-void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, block_size, norm_factor, strength_thresh, sensitivity, border_undefined);
-}
-
-void CLHarrisScoreKernel::configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
- ARM_COMPUTE_ERROR_ON(!(block_size == 3 || block_size == 5 || block_size == 7));
- ARM_COMPUTE_ERROR_ON(0.0f == norm_factor);
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
- _sensitivity = sensitivity;
- _strength_thresh = strength_thresh;
- _norm_factor = norm_factor;
- _border_size = BorderSize(block_size / 2);
-
- // Select kernel
- std::stringstream harris_score_kernel_name;
- harris_score_kernel_name << "harris_score_" << block_size << "x" << block_size;
-
- // Create build options
- std::set<std::string> build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type())) };
-
- // Create kernel
- _kernel = create_kernel(compile_context, harris_score_kernel_name.str(), build_opts);
-
- // Set static kernel arguments
- unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, sensitivity);
- _kernel.setArg(idx++, strength_thresh);
- _kernel.setArg(idx++, norm_factor);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 4;
- constexpr unsigned int num_elems_written_per_iteration = 4;
- const unsigned int num_elems_read_per_iteration = block_size == 7 ? 10 : 8;
- const unsigned int num_rows_read_per_iteration = block_size;
-
- Window win = calculate_max_window(*_input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input1_access(input1->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowRectangle input2_access(input2->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input1_access, input2_access, output_access);
-
- ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), input2->info()->valid_region());
- output_access.set_valid_region(win, valid_region, border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = harris_score_kernel_name.str();
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input1->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input1->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input1->info()->dimension(1));
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input2->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input2->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input2->info()->dimension(1));
-}
-
-void CLHarrisScoreKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input1, slice);
- add_2D_tensor_argument(idx, _input2, slice);
- add_2D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.h b/src/core/CL/kernels/CLHarrisCornersKernel.h
deleted file mode 100644
index 6482b0aa4e..0000000000
--- a/src/core/CL/kernels/CLHarrisCornersKernel.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
-#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the harris score kernel.
- *
- * @note The implementation supports 3, 5, and 7 for the block_size.
- */
-class CLHarrisScoreKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHarrisScoreKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default;
- /** Default destructor */
- ~CLHarrisScoreKernel() = default;
-
- /** Setup the kernel parameters
- *
- * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
- * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
- * @param[out] output Destination image (harris score). Data types supported F32
- * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined);
- /** Setup the kernel parameters
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
- * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
- * @param[out] output Destination image (harris score). Data types supported F32
- * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-protected:
- const ICLImage *_input1; /**< Source image - Gx component */
- const ICLImage *_input2; /**< Source image - Gy component */
- ICLImage *_output; /**< Source image - Harris score */
- float _sensitivity; /**< Sensitivity value */
- float _strength_thresh; /**< Threshold value */
- float _norm_factor; /**< Normalization factor */
- BorderSize _border_size; /**< Border size */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */
diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp
deleted file mode 100644
index ca5322aa51..0000000000
--- a/src/core/CL/kernels/CLHistogramKernel.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLHistogramKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLDistribution1D.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <cstring>
-#include <string>
-
-using namespace arm_compute;
-
-// each thread handle 16 pixels
-constexpr signed int pixels_per_item = 16;
-
-// local work group size in X dimension
-constexpr unsigned int local_x_size = 16;
-
-CLHistogramKernel::CLHistogramKernel()
- : _input(nullptr), _output(nullptr)
-{
-}
-
-void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLHistogramKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
-
- // Check input size
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
-
- // Check offset
- ARM_COMPUTE_ERROR_ON_MSG(0 > output->offset() || output->offset() > 256, "Offset is larger than the image value range.");
-
- // Check range
- ARM_COMPUTE_ERROR_ON_MSG(output->range() > 256 /* max range */, "Range larger than the image value range.");
-
- _input = input;
- _output = output;
-
- if(_input->info()->dimension(0) < pixels_per_item)
- {
- return;
- }
-
- unsigned int num_bins = _output->num_bins();
- unsigned int window_size = _output->window();
- unsigned int offset = _output->offset();
- unsigned int range = _output->range();
- unsigned int offrange = offset + range;
- unsigned int bin_size = _output->size();
- unsigned int buffer_size = bin_size + 1; // We need one extra place for pixels that don't meet the conditions
-
- // Create kernel
- bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange);
- const std::string kernel_name = is_fixed_size ? "hist_local_kernel_fixed" : "hist_local_kernel";
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Set static kernel arguments
- unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, buffer_size, nullptr);
- _kernel.setArg(idx++, _output->cl_buffer());
- if(!is_fixed_size)
- {
- _kernel.setArg<cl_uint>(idx++, num_bins);
- _kernel.setArg<cl_uint>(idx++, offset);
- _kernel.setArg<cl_uint>(idx++, range);
- _kernel.setArg<cl_uint>(idx++, offrange);
- }
-
- // We only run histogram on Image, therefore only 2 dimensions here
- unsigned int end_position = (_input->info()->dimension(0) / pixels_per_item) * pixels_per_item;
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, end_position, pixels_per_item));
- win.set(1, Window::Dimension(0, _input->info()->dimension(1)));
-
- update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, pixels_per_item));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-void CLHistogramKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- // TODO (COMPMID-679): Add CLMemFill
- _output->map(queue, true);
- ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
- memset(_output->buffer(), 0, _output->size());
- _output->unmap(queue);
-
- if(_input->info()->dimension(0) < pixels_per_item)
- {
- return;
- }
-
- Window slice = window.first_slice_window_2D();
- const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step();
- cl::NDRange lws = (local_x_size < gws_x) ? cl::NDRange(local_x_size, 1) : cl::NDRange(1, 1);
-
- do
- {
- /* Run the core part which has width can be divided by 16 */
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
-
- enqueue(queue, *this, slice, lws);
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-CLHistogramBorderKernel::CLHistogramBorderKernel()
- : _input(nullptr), _output(nullptr)
-{
-}
-
-void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLHistogramBorderKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
-
- // Check input size
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
-
- // Check offset
- ARM_COMPUTE_ERROR_ON_MSG(0 > output->offset() || output->offset() > 256, "Offset is larger than the image value range.");
-
- // Check range
- ARM_COMPUTE_ERROR_ON_MSG(output->range() > 256 /* max range */, "Range larger than the image value range.");
-
- // We only run histogram on Image, therefore only 2 dimensions here
- unsigned int start_position = (input->info()->dimension(0) / pixels_per_item) * pixels_per_item;
-
- if(start_position >= input->info()->dimension(0))
- {
- return; // no need to run histogram border kernel
- }
-
- _input = input;
- _output = output;
-
- unsigned int num_bins = _output->num_bins();
- unsigned int window_size = _output->window();
- unsigned int offset = _output->offset();
- unsigned int range = _output->range();
- unsigned int offrange = offset + range;
-
- // Create kernel
- bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange);
- const std::string kernel_name = is_fixed_size ? "hist_border_kernel_fixed" : "hist_border_kernel";
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Set static kernel arguments
- unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, _output->cl_buffer());
- if(!is_fixed_size)
- {
- _kernel.setArg<cl_uint>(idx++, num_bins);
- _kernel.setArg<cl_uint>(idx++, offset);
- _kernel.setArg<cl_uint>(idx++, range);
- _kernel.setArg<cl_uint>(idx++, offrange);
- }
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(start_position, _input->info()->dimension(0)));
- win.set(1, Window::Dimension(0, _input->info()->dimension(1)));
- update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, 1));
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-void CLHistogramBorderKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- if(window.x().start() >= window.x().end())
- {
- return;
- }
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- cl::NDRange lws = cl::NDRange(1, 1);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- /* Run the border part which has width cannot be divided by 16 */
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
-
- enqueue(queue, *this, slice, lws);
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLHistogramKernel.h b/src/core/CL/kernels/CLHistogramKernel.h
deleted file mode 100644
index 9c97c6590d..0000000000
--- a/src/core/CL/kernels/CLHistogramKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H
-#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLDistribution1D;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16.
- * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel
- */
-class CLHistogramKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLHistogramKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramKernel(const CLHistogramKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramKernel &operator=(const CLHistogramKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHistogramKernel(CLHistogramKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHistogramKernel &operator=(CLHistogramKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const ICLImage *input, ICLDistribution1D *output);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input;
- ICLDistribution1D *_output;
-};
-
-/** Interface to run the histogram kernel to handle the leftover part of image
- *
- */
-class CLHistogramBorderKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLHistogramBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const ICLImage *input, ICLDistribution1D *output);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input;
- ICLDistribution1D *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/
diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp
deleted file mode 100644
index 5e5683d231..0000000000
--- a/src/core/CL/kernels/CLIntegralImageKernel.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLIntegralImageKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <cstddef>
-
-using namespace arm_compute;
-
-void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLIntegralImageHorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32);
-
- _input = input;
- _output = output;
-
- // Create kernel
- const std::string kernel_name = std::string("integral_horizontal");
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Configure kernel window
- const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0);
- const unsigned int num_elems_accessed_per_iteration = ceil_to_multiple(num_elems_processed_per_iteration, 16);
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_accessed_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), 0, num_elems_accessed_per_iteration),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
-}
-
-CLIntegralImageVertKernel::CLIntegralImageVertKernel()
- : _in_out(nullptr)
-{
-}
-
-void CLIntegralImageVertKernel::configure(ICLTensor *in_out)
-{
- configure(CLKernelLibrary::get().get_compile_context(), in_out);
-}
-
-void CLIntegralImageVertKernel::configure(const CLCompileContext &compile_context, ICLTensor *in_out)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(in_out, 1, DataType::U32);
-
- _in_out = in_out;
-
- // Create kernel
- const std::string kernel_name = std::string("integral_vertical");
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration_x = 8;
- const unsigned int num_elems_processed_per_iteration_y = in_out->info()->dimension(Window::DimY);
-
- Window win = calculate_max_window(*in_out->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
- AccessWindowRectangle in_out_access(in_out->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
-
- update_window_and_padding(win, in_out_access);
-
- in_out_access.set_valid_region(win, in_out->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(in_out->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(in_out->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(in_out->info()->dimension(1));
-}
-
-void CLIntegralImageVertKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const size_t height = _in_out->info()->dimension(1);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _in_out, slice);
- _kernel.setArg<cl_uint>(idx++, height);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLIntegralImageKernel.h b/src/core/CL/kernels/CLIntegralImageKernel.h
deleted file mode 100644
index 0e40e3afbc..0000000000
--- a/src/core/CL/kernels/CLIntegralImageKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
-#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to run the horizontal pass of the integral image kernel. */
-class CLIntegralImageHorKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-};
-
-/** Interface to run the vertical pass of the integral image kernel. */
-class CLIntegralImageVertKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLIntegralImageVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in,out] in_out The input/output tensor. Data types supported: U32
- */
- void configure(ICLTensor *in_out);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] in_out The input/output tensor. Data types supported: U32
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *in_out);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_in_out;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */
diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
deleted file mode 100644
index 9845dd6169..0000000000
--- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLMagnitudePhaseKernel::CLMagnitudePhaseKernel()
- : _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr), _run_mag(false), _run_phase(false)
-{
-}
-
-void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type, PhaseType phase_type)
-{
- configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, mag_type, phase_type);
-}
-
-void CLMagnitudePhaseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type, PhaseType phase_type)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON((magnitude == nullptr) && (phase == nullptr));
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, gy);
-
- _run_mag = (magnitude != nullptr);
- _run_phase = (phase != nullptr);
- if(_run_mag)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, magnitude);
- }
- if(_run_phase)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
- }
-
- if(!_run_mag && !_run_phase)
- {
- ARM_COMPUTE_ERROR("At least one output must be NOT NULL");
- }
-
- _gx = gx;
- _gy = gy;
- _magnitude = magnitude;
- _phase = phase;
-
- // Construct kernel name
- std::set<std::string> build_opts = {};
-
- // Add magnitude type
- if(_run_mag)
- {
- switch(mag_type)
- {
- case MagnitudeType::L1NORM:
- build_opts.insert("-DMAGNITUDE=1");
- break;
- case MagnitudeType::L2NORM:
- build_opts.insert("-DMAGNITUDE=2");
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported magnitude calculation type.");
- build_opts.insert("-DMAGNITUDE=0");
- break;
- }
- }
-
- // Add phase type
- if(_run_phase)
- {
- switch(phase_type)
- {
- case PhaseType::UNSIGNED:
- build_opts.insert("-DPHASE=1");
- break;
- case PhaseType::SIGNED:
- build_opts.insert("-DPHASE=2");
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported phase calculation type.");
- build_opts.insert("-DPHASE=0");
- break;
- }
- }
-
- // Add data_type
- build_opts.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(gx->info()->data_type()));
-
- // Create kernel
- const std::string kernel_name = std::string("magnitude_phase");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 16;
-
- Window win = calculate_max_window(*gx->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal gx_access(gx->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal gy_access(gy->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_magnitude_access(magnitude == nullptr ? nullptr : magnitude->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_phase_access(phase == nullptr ? nullptr : phase->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win,
- gx_access, gy_access,
- output_magnitude_access, output_phase_access);
-
- ValidRegion valid_region = intersect_valid_regions(gx->info()->valid_region(),
- gy->info()->valid_region());
- output_magnitude_access.set_valid_region(win, valid_region);
- output_phase_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(gx->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(gx->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(gx->info()->dimension(1));
-}
-
-void CLMagnitudePhaseKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _gx, slice);
- add_2D_tensor_argument(idx, _gy, slice);
- add_2D_tensor_argument_if((_run_mag), idx, _magnitude, slice);
- add_2D_tensor_argument_if((_run_phase), idx, _phase, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.h b/src/core/CL/kernels/CLMagnitudePhaseKernel.h
deleted file mode 100644
index 514036b2ff..0000000000
--- a/src/core/CL/kernels/CLMagnitudePhaseKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
-#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Template interface for the kernel to compute magnitude and phase.
- *
- */
-class CLMagnitudePhaseKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLMagnitudePhaseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default;
- /** Initialise the kernel's input, output.
- *
- * @note At least one of output1 or output2 must be set.
- *
- * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
- * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
- /** Initialise the kernel's input, output.
- *
- * @note At least one of output1 or output2 must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
- * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_gx; /**< Input gradient X. */
- const ICLTensor *_gy; /**< Input gradient Y. */
- ICLTensor *_magnitude; /**< Output - Magnitude. */
- ICLTensor *_phase; /**< Output - Phase. */
- bool _run_mag; /**< Calculate magnitude ? */
- bool _run_phase; /**< Calculate phase ? */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
deleted file mode 100644
index aed6e6eaf7..0000000000
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "src/core/CL/CLValidate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <cmath>
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLMeanStdDevKernel::CLMeanStdDevKernel()
- : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr), _border_size(0)
-{
-}
-
-BorderSize CLMeanStdDevKernel::border_size() const
-{
- return _border_size;
-}
-
-Status CLMeanStdDevKernel::validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
-{
- ARM_COMPUTE_UNUSED(mean);
- ARM_COMPUTE_UNUSED(stddev);
- ARM_COMPUTE_UNUSED(global_sum);
- ARM_COMPUTE_UNUSED(global_sum_squared);
- ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED();
- ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
-
- return Status{};
-}
-
-void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, mean, global_sum, stddev, global_sum_squared);
-}
-
-void CLMeanStdDevKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, global_sum);
- ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared);
- ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevKernel::validate(input->info(), mean, global_sum, stddev, global_sum_squared));
-
- _input = input;
- _mean = mean;
- _stddev = stddev;
- _global_sum = global_sum;
- _global_sum_squared = global_sum_squared;
-
- // Create kernel
- std::set<std::string> build_opts;
-
- if(_stddev != nullptr)
- {
- build_opts.insert("-DSTDDEV");
- }
-
- _kernel = create_kernel(compile_context, "mean_stddev_accumulate", build_opts);
-
- // Set fixed arguments
- unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input parameters
-
- _kernel.setArg(idx++, static_cast<cl_uint>(input->info()->dimension(1)));
- _kernel.setArg(idx++, *_global_sum);
-
- if(_stddev != nullptr)
- {
- _kernel.setArg(idx++, *_global_sum_squared);
- }
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration_x = 8;
- const unsigned int num_elems_processed_per_iteration_y = input->info()->dimension(1);
-
- _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration_x) - input->info()->dimension(0));
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
- AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
- update_window_and_padding(win, input_access);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLMeanStdDevKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- // Clear sums
- static const cl_ulong zero = 0;
- queue.enqueueWriteBuffer(*_global_sum, CL_FALSE, 0, sizeof(cl_ulong), &zero);
-
- if(_stddev != nullptr)
- {
- queue.enqueueWriteBuffer(*_global_sum_squared, CL_FALSE, 0, sizeof(cl_ulong), &zero);
- }
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- // Set slice step equal to height to force gws[1] to 1,
- // as each thread calculates the sum across all rows and columns equal to the number of elements processed by each work-item
- slice.set_dimension_step(Window::DimY, _input->info()->dimension(1));
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-
- // Calculate mean and stddev
- cl_ulong global_sum = 0;
- cl_ulong global_sum_squared = 0;
- const float num_pixels = _input->info()->dimension(0) * _input->info()->dimension(1);
-
- queue.enqueueReadBuffer(*_global_sum, CL_TRUE, 0, sizeof(cl_ulong), static_cast<void *>(&global_sum));
- const float mean = global_sum / num_pixels;
- *_mean = mean;
-
- if(_stddev != nullptr)
- {
- queue.enqueueReadBuffer(*_global_sum_squared, CL_TRUE, 0, sizeof(cl_ulong), static_cast<void *>(&global_sum_squared));
- *_stddev = std::sqrt((global_sum_squared / num_pixels) - (mean * mean));
- }
-}
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.h b/src/core/CL/kernels/CLMeanStdDevKernel.h
deleted file mode 100644
index 179a2025b7..0000000000
--- a/src/core/CL/kernels/CLMeanStdDevKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
-#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
-class CLMeanStdDevKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMeanStdDevKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default;
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
- */
- void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
- *
- * @param[in] input Input image info. Data types supported: U8.
- * @param[in] mean Input average pixel value.
- * @param[in] global_sum Keeps global sum of pixel values.
- * @param[in] stddev (Optional) Output standard deviation of pixel values.
- * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLImage *_input;
- float *_mean;
- float *_stddev;
- cl::Buffer *_global_sum;
- cl::Buffer *_global_sum_squared;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */
diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp
deleted file mode 100644
index 23a21d6b19..0000000000
--- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-BorderSize CLMedian3x3Kernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLMedian3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-
- _input = input;
- _output = output;
-
- // Create kernel
- const std::string kernel_name = std::string("non_linear_filter_box3x3");
- _kernel = create_kernel(compile_context, kernel_name, { "-DMEDIAN" });
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.h b/src/core/CL/kernels/CLMedian3x3Kernel.h
deleted file mode 100644
index 8cc5ed7279..0000000000
--- a/src/core/CL/kernels/CLMedian3x3Kernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
-#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the median 3x3 filter kernel.
- *
- */
-class CLMedian3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
deleted file mode 100644
index 675cfc19a9..0000000000
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <climits>
-
-namespace arm_compute
-{
-inline int32_t FloatFlip(float val)
-{
- static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t");
- int32_t int_val = 0;
-
- memcpy(&int_val, &val, sizeof(float));
- int_val = (int_val >= 0) ? int_val : int_val ^ 0x7FFFFFFF;
- return int_val;
-}
-
-inline float IFloatFlip(int32_t val)
-{
- static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t");
- float flt_val = 0.f;
-
- val = (val >= 0) ? val : val ^ 0x7FFFFFFF;
- memcpy(&flt_val, &val, sizeof(float));
- return flt_val;
-}
-
-CLMinMaxKernel::CLMinMaxKernel()
- : _input(nullptr), _min_max(), _data_type_max_min()
-{
-}
-
-void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, min_max);
-}
-
-void CLMinMaxKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON(min_max == nullptr);
-
- _input = input;
- _min_max = min_max;
- const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0);
-
- switch(input->info()->data_type())
- {
- case DataType::U8:
- _data_type_max_min[0] = UCHAR_MAX;
- _data_type_max_min[1] = 0;
- break;
- case DataType::S16:
- _data_type_max_min[0] = SHRT_MAX;
- _data_type_max_min[1] = SHRT_MIN;
- break;
- case DataType::F32:
- _data_type_max_min[0] = FloatFlip(std::numeric_limits<float>::max());
- _data_type_max_min[1] = FloatFlip(std::numeric_limits<float>::lowest());
- break;
- default:
- ARM_COMPUTE_ERROR("You called with the wrong image data types");
- }
-
- // Set kernel build options
- std::set<std::string> build_opts{ "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()) };
-
- if(num_elems_processed_per_iteration % max_cl_vector_width != 0)
- {
- build_opts.emplace("-DNON_MULTIPLE_OF_16");
- }
-
- if(input->info()->data_type() == DataType::F32)
- {
- build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(std::numeric_limits<float>::max()));
- build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(std::numeric_limits<float>::lowest()));
- build_opts.emplace("-DIS_DATA_TYPE_FLOAT");
- }
- else
- {
- build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(_data_type_max_min[0]));
- build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(_data_type_max_min[1]));
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "minmax", build_opts);
-
- // Set fixed arguments
- unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, *_min_max);
- _kernel.setArg<cl_int>(idx++, static_cast<cl_int>(input->info()->dimension(0)));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16)));
- ICLKernel::configure_internal(win);
-}
-
-void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- // Reset mininum and maximum values
- queue.enqueueWriteBuffer(*_min_max, CL_FALSE /* blocking */, 0, _data_type_max_min.size() * sizeof(int), _data_type_max_min.data());
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-
- cl_int min = 0;
- cl_int max = 0;
- queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 0 * sizeof(cl_int), sizeof(cl_int), static_cast<int *>(&min));
- queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 1 * sizeof(cl_int), sizeof(cl_int), static_cast<int *>(&max));
-
- if(_input->info()->data_type() == DataType::F32)
- {
- std::array<float, 2> min_max =
- {
- {
- IFloatFlip(min),
- IFloatFlip(max)
- }
- };
- queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(float), min_max.data());
- }
- else
- {
- std::array<int32_t, 2> min_max = { { min, max } };
- queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(int32_t), min_max.data());
- }
-}
-
-CLMinMaxLocationKernel::CLMinMaxLocationKernel()
- : _input(nullptr), _min_max_count(nullptr)
-{
-}
-
-void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, ICLCoordinates2DArray *max_loc)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, min_max, min_max_count, min_loc, max_loc);
-}
-
-void CLMinMaxLocationKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc,
- ICLCoordinates2DArray *max_loc)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON(min_max == nullptr);
- ARM_COMPUTE_ERROR_ON(min_max_count == nullptr && min_loc == nullptr && max_loc == nullptr);
-
- _input = input;
- _min_max_count = min_max_count;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace((min_max_count != nullptr) ? "-DCOUNT_MIN_MAX" : "");
- build_opts.emplace((min_loc != nullptr) ? "-DLOCATE_MIN" : "");
- build_opts.emplace((max_loc != nullptr) ? "-DLOCATE_MAX" : "");
- if(input->info()->data_type() == DataType::F32)
- {
- build_opts.emplace("-DIS_DATA_TYPE_FLOAT");
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "minmaxloc", build_opts);
-
- // Set static arguments
- unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, *min_max);
- _kernel.setArg(idx++, *min_max_count);
- if(min_loc != nullptr)
- {
- _kernel.setArg(idx++, min_loc->cl_buffer());
- _kernel.setArg<cl_uint>(idx++, min_loc->max_num_values());
- }
- if(max_loc != nullptr)
- {
- _kernel.setArg(idx++, max_loc->cl_buffer());
- _kernel.setArg<cl_uint>(idx++, max_loc->max_num_values());
- }
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 1;
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
- ICLKernel::configure_internal(win);
-}
-
-void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- static const unsigned int zero_count = 0;
- queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 0 * sizeof(zero_count), sizeof(zero_count), &zero_count);
- queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 1 * sizeof(zero_count), sizeof(zero_count), &zero_count);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.h b/src/core/CL/kernels/CLMinMaxLocationKernel.h
deleted file mode 100644
index 2196abe033..0000000000
--- a/src/core/CL/kernels/CLMinMaxLocationKernel.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
-#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "src/core/CL/ICLKernel.h"
-
-#include <array>
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the kernel to perform min max search on an image.
- */
-class CLMinMaxKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMinMaxKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxKernel(const CLMinMaxKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxKernel(CLMinMaxKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const ICLImage *input, cl::Buffer *min_max);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Input image. */
- cl::Buffer *_min_max; /**< Minimum/maximum value. */
- std::array<int, 2> _data_type_max_min; /**< Maximum and minimum data type value respectively. */
-};
-
-/** Interface for the kernel to find min max locations of an image.
- */
-class CLMinMaxLocationKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLMinMaxLocationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default;
- /** Initialise the kernel's input and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- */
- void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
- ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
- /** Initialise the kernel's input and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
- ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input; /**< Input image. */
- cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */
diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
deleted file mode 100644
index c73acaf1d8..0000000000
--- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdlib>
-#include <set>
-#include <sstream>
-#include <string>
-
-using namespace arm_compute;
-
-CLNonLinearFilterKernel::CLNonLinearFilterKernel()
- : _border_size(0)
-{
-}
-
-BorderSize CLNonLinearFilterKernel::border_size() const
-{
- return _border_size;
-}
-
-void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, function, mask_size, pattern, mask, border_undefined);
-}
-
-void CLNonLinearFilterKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(mask_size != 3 && mask_size != 5);
- ARM_COMPUTE_ERROR_ON_MSG(pattern == MatrixPattern::OTHER, "MatrixPattern::OTHER is not supported!");
- ARM_COMPUTE_UNUSED(mask);
-
- _input = input;
- _output = output;
- _border_size = BorderSize(mask_size / 2);
-
- // Define build options
- std::set<std::string> build_opts;
- build_opts.emplace("-D" + string_from_non_linear_filter_function(function));
-
- // Define kernel
- std::string pattern_name = string_from_matrix_pattern(pattern);
- std::transform(pattern_name.begin(), pattern_name.end(), pattern_name.begin(), ::tolower);
- std::stringstream ss;
- ss << "non_linear_filter_" << pattern_name << mask_size << "x" << mask_size;
-
- // Create kernel
- _kernel = create_kernel(compile_context, ss.str(), build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- const unsigned int num_rows_read_per_iteration = mask_size;
-
- Window win = calculate_max_window(*input->info(), num_elems_processed_per_iteration, border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.h b/src/core/CL/kernels/CLNonLinearFilterKernel.h
deleted file mode 100644
index ed42063d2b..0000000000
--- a/src/core/CL/kernels/CLNonLinearFilterKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
-#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to apply a non-linear filter */
-class CLNonLinearFilterKernel : public ICLSimple2DKernel
-{
-public:
- /** Default constructor */
- CLNonLinearFilterKernel();
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data types supported: U8
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined);
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size; /**< Border size */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */
diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
deleted file mode 100644
index 7d5c5ba7e1..0000000000
--- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-BorderSize CLNonMaximaSuppression3x3Kernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
-}
-
-void CLNonMaximaSuppression3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::F32);
-
- _input = input;
- _output = output;
-
- // Create kernel
- std::set<std::string> build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) };
- _kernel = create_kernel(compile_context, "non_max_suppression", build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
deleted file mode 100644
index d9ed60ce6b..0000000000
--- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
-#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL
- *
- * @note Used by @ref CLFastCorners and @ref CLHarrisCorners
- */
-class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
- * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
- * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */
diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp
deleted file mode 100644
index 7ceddc9626..0000000000
--- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLScharr3x3Kernel::CLScharr3x3Kernel()
- : _run_scharr_x(false), _run_scharr_y(false), _input(nullptr), _output_x(nullptr), _output_y(nullptr)
-{
-}
-
-BorderSize CLScharr3x3Kernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
-}
-
-void CLScharr3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
-
- _run_scharr_x = output_x != nullptr;
- _run_scharr_y = output_y != nullptr;
-
- if(_run_scharr_x)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16);
- }
-
- if(_run_scharr_y)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16);
- }
-
- _input = input;
- _output_x = output_x;
- _output_y = output_y;
-
- // Set build options
- std::set<std::string> build_opts;
-
- if(_run_scharr_x)
- {
- build_opts.insert("-DGRAD_X");
- }
-
- if(_run_scharr_y)
- {
- build_opts.insert("-DGRAD_Y");
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "scharr3x3", build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
- AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_x_access, output_y_access);
-
- output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-}
-
-void CLScharr3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument_if((_run_scharr_x), idx, _output_x, slice);
- add_2D_tensor_argument_if((_run_scharr_y), idx, _output_y, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.h b/src/core/CL/kernels/CLScharr3x3Kernel.h
deleted file mode 100644
index a670da5b6f..0000000000
--- a/src/core/CL/kernels/CLScharr3x3Kernel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H
-#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
- *
- * @f[
- * \mathbf{G}_x=\begin{vmatrix}
- * -3 & 0 & +3\\
- * -10& 0 & +10\\
- * -3 & 0 & +3
- * \end{vmatrix}
- * @f]
- * @f[
- * \mathbf{G}_y=\begin{vmatrix}
- * -3 & -10 & -3\\
- * 0 & 0 & 0\\
- * +3 & +10 & +3
- * \end{vmatrix}
- * @f]
- */
-class CLScharr3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLScharr3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default;
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- bool _run_scharr_x; /**< Do we need to run Scharr X ? */
- bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
- const ICLTensor *_input; /**< Input image */
- ICLTensor *_output_x; /**< Output image for scharr X */
- ICLTensor *_output_y; /**< Output image for scharr Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */
diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp
deleted file mode 100644
index a87677a38f..0000000000
--- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLSobel3x3Kernel::CLSobel3x3Kernel()
- : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false)
-{
-}
-
-BorderSize CLSobel3x3Kernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
-}
-
-void CLSobel3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
-
- _run_sobel_x = output_x != nullptr;
- _run_sobel_y = output_y != nullptr;
-
- if(_run_sobel_x)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16);
- }
-
- if(_run_sobel_y)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16);
- }
-
- _input = input;
- _output_x = output_x;
- _output_y = output_y;
-
- // Set build options
- std::set<std::string> build_opts;
-
- if(_run_sobel_x)
- {
- build_opts.insert("-DGRAD_X");
- }
-
- if(_run_sobel_y)
- {
- build_opts.insert("-DGRAD_Y");
- }
-
- // Create kernel
- const std::string kernel_name = std::string("sobel3x3");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
- AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_x_access, output_y_access);
-
- output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-void CLSobel3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice);
- add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.h b/src/core/CL/kernels/CLSobel3x3Kernel.h
deleted file mode 100644
index fed8068762..0000000000
--- a/src/core/CL/kernels/CLSobel3x3Kernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H
-#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */
-class CLSobel3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default;
- /** Default destructor */
- ~CLSobel3x3Kernel() = default;
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< Output tensor for Sobel X */
- ICLTensor *_output_y; /**< Output tensor for Sobel Y */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */
diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp
deleted file mode 100644
index c450becd1d..0000000000
--- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLSobel5x5HorKernel::CLSobel5x5HorKernel()
- : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false), _border_size(0)
-{
-}
-
-BorderSize CLSobel5x5HorKernel::border_size() const
-{
- return _border_size;
-}
-
-void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
-}
-
-void CLSobel5x5HorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
-
- _run_sobel_x = output_x != nullptr;
- _run_sobel_y = output_y != nullptr;
-
- if(_run_sobel_x)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16);
- }
-
- if(_run_sobel_y)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16);
- }
-
- _input = input;
- _output_x = output_x;
- _output_y = output_y;
- _border_size = BorderSize(border_undefined ? 0 : 2, 2);
-
- // Set build options
- std::set<std::string> build_opts;
-
- if(_run_sobel_x)
- {
- build_opts.insert("-DGRAD_X");
- }
-
- if(_run_sobel_y)
- {
- build_opts.insert("-DGRAD_Y");
- }
-
- // Create kernel
- const std::string kernel_name = std::string("sobel_separable1x5");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
-
- Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowHorizontal input_access(input->info(), -border_size().left, num_elems_read_per_iteration);
- AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
- AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_x_access, output_y_access);
-
- output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-void CLSobel5x5HorKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice);
- add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-CLSobel5x5VertKernel::CLSobel5x5VertKernel()
- : _input_x(nullptr), _input_y(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false)
-{
-}
-
-BorderSize CLSobel5x5VertKernel::border_size() const
-{
- return BorderSize{ 2, 0 };
-}
-
-void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined);
-}
-
-void CLSobel5x5VertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
-
- _run_sobel_x = output_x != nullptr;
- _run_sobel_y = output_y != nullptr;
-
- if(_run_sobel_x)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_x, 1, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16);
- }
-
- if(_run_sobel_y)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_y, 1, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16);
- }
-
- _input_x = input_x;
- _input_y = input_y;
- _output_x = output_x;
- _output_y = output_y;
-
- // Set build options
- std::set<std::string> build_opts;
-
- if(_run_sobel_x)
- {
- build_opts.insert("-DGRAD_X");
- }
-
- if(_run_sobel_y)
- {
- build_opts.insert("-DGRAD_Y");
- }
-
- // Create kernel
- const std::string kernel_name = std::string("sobel_separable5x1");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- const ICLTensor *input = _run_sobel_x ? _input_x : _input_y;
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 8;
- constexpr unsigned int num_rows_read_per_iteration = 5;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_x_access(input_x == nullptr ? nullptr : input_x->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowRectangle input_y_access(input_y == nullptr ? nullptr : input_y->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
- AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_x_access, input_y_access, output_x_access, output_y_access);
-
- output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-void CLSobel5x5VertKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument_if((_run_sobel_x), idx, _input_x, slice);
- add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice);
- add_2D_tensor_argument_if((_run_sobel_y), idx, _input_y, slice);
- add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice);
-
- _kernel.setArg(idx++, 0 /*dummy*/);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.h b/src/core/CL/kernels/CLSobel5x5Kernel.h
deleted file mode 100644
index a163ac932a..0000000000
--- a/src/core/CL/kernels/CLSobel5x5Kernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H
-#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */
-class CLSobel5x5HorKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel5x5HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default;
- /** Default destructor */
- ~CLSobel5x5HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< X output of horizontal pass */
- ICLTensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */
-class CLSobel5x5VertKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel5x5VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default;
- /** Default destructor */
- ~CLSobel5x5VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
- const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
- ICLTensor *_output_x; /**< X output of sobel */
- ICLTensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */
diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp
deleted file mode 100644
index 1cfa74f7b3..0000000000
--- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-CLSobel7x7HorKernel::CLSobel7x7HorKernel()
- : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false), _border_size(0)
-{
-}
-
-BorderSize CLSobel7x7HorKernel::border_size() const
-{
- return _border_size;
-}
-
-void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
-}
-
-void CLSobel7x7HorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
-
- _run_sobel_x = output_x != nullptr;
- _run_sobel_y = output_y != nullptr;
-
- if(_run_sobel_x)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S32);
- }
-
- if(_run_sobel_y)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S32);
- }
-
- _input = input;
- _output_x = output_x;
- _output_y = output_y;
- _border_size = BorderSize(border_undefined ? 0 : 3, 3);
-
- // Construct kernel name
- const std::string kernel_name = "sobel_separable1x7";
-
- // Set build options
- std::set<std::string> build_opts;
-
- if(_run_sobel_x)
- {
- build_opts.insert("-DGRAD_X");
- }
-
- if(_run_sobel_y)
- {
- build_opts.insert("-DGRAD_Y");
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
-
- Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowHorizontal input_access(input->info(), -border_size().left, num_elems_read_per_iteration);
- AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
- AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_access, output_x_access, output_y_access);
-
- output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-void CLSobel7x7HorKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice);
- add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
-
-CLSobel7x7VertKernel::CLSobel7x7VertKernel()
- : _input_x(nullptr), _input_y(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false)
-{
-}
-
-BorderSize CLSobel7x7VertKernel::border_size() const
-{
- return BorderSize{ 3, 0 };
-}
-
-void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined);
-}
-
-void CLSobel7x7VertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
-
- _run_sobel_x = output_x != nullptr;
- _run_sobel_y = output_y != nullptr;
-
- if(_run_sobel_x)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_x, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S32);
- }
-
- if(_run_sobel_y)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_y, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S32);
- }
-
- _input_x = input_x;
- _input_y = input_y;
- _output_x = output_x;
- _output_y = output_y;
-
- // Set build options
- std::set<std::string> build_opts;
-
- if(_run_sobel_x)
- {
- build_opts.insert("-DGRAD_X");
- }
-
- if(_run_sobel_y)
- {
- build_opts.insert("-DGRAD_Y");
- }
-
- // Create kernel
- const std::string kernel_name = std::string("sobel_separable7x1");
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
-
- const ICLTensor *input = _run_sobel_x ? _input_x : _input_y;
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_written_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 8;
- constexpr unsigned int num_rows_read_per_iteration = 7;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
-
- AccessWindowRectangle input_x_access(input_x == nullptr ? nullptr : input_x->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowRectangle input_y_access(input_y == nullptr ? nullptr : input_y->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
- AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
- AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win, input_x_access, input_y_access, output_x_access, output_y_access);
-
- output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
- output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(border_undefined);
-}
-
-void CLSobel7x7VertKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- do
- {
- unsigned int idx = 0;
-
- add_2D_tensor_argument_if((_run_sobel_x), idx, _input_x, slice);
- add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice);
- add_2D_tensor_argument_if((_run_sobel_y), idx, _input_y, slice);
- add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice);
-
- _kernel.setArg(idx++, 0 /*dummy*/);
-
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
-}
diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.h b/src/core/CL/kernels/CLSobel7x7Kernel.h
deleted file mode 100644
index c85f0aedf9..0000000000
--- a/src/core/CL/kernels/CLSobel7x7Kernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H
-#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */
-class CLSobel7x7HorKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel7x7HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default;
- /** Default destructor */
- ~CLSobel7x7HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< X output of horizontal pass */
- ICLTensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */
-class CLSobel7x7VertKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel7x7VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default;
- /** Default destructor */
- ~CLSobel7x7VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
- const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
- ICLTensor *_output_x; /**< X output of sobel */
- ICLTensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */
diff --git a/src/core/CL/kernels/CLTableLookupKernel.cpp b/src/core/CL/kernels/CLTableLookupKernel.cpp
deleted file mode 100644
index b82f4c9889..0000000000
--- a/src/core/CL/kernels/CLTableLookupKernel.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLTableLookupKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLLut.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-
-#include <cstdint>
-#include <string>
-
-using namespace arm_compute;
-
-void CLTableLookupKernel::configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, lut, output);
-}
-
-void CLTableLookupKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON(lut == nullptr);
- ARM_COMPUTE_ERROR_ON(DataType::U8 != lut->type() && DataType::S16 != lut->type());
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- // Create kernel
- std::string kernel_name = (DataType::S16 == lut->type()) ? "tablelookup_S16" : "tablelookup_U8";
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Set lut argument
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, lut->cl_buffer());
- if(DataType::S16 == lut->type())
- {
- _kernel.setArg(idx++, lut->index_offset());
- _kernel.setArg(idx++, static_cast<uint32_t>(lut->num_elements()));
- }
-
- // Configure kernel
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
-}
diff --git a/src/core/CL/kernels/CLTableLookupKernel.h b/src/core/CL/kernels/CLTableLookupKernel.h
deleted file mode 100644
index c8d15cbee2..0000000000
--- a/src/core/CL/kernels/CLTableLookupKernel.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
-#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
-
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-class ICLLut;
-
-/** Interface for the kernel to perform table lookup calculations. */
-class CLTableLookupKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] input An input tensor. Data types supported: U8, S16.
- * @param[in] lut The input LUT. Data types supported: U8, S16.
- * @param[out] output The output tensor. Data types supported: U8, S16.
- */
- void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8, S16.
- * @param[in] lut The input LUT. Data types supported: U8, S16.
- * @param[out] output The output tensor. Data types supported: U8, S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */
diff --git a/src/core/CL/kernels/CLThresholdKernel.cpp b/src/core/CL/kernels/CLThresholdKernel.cpp
deleted file mode 100644
index 72c22f043c..0000000000
--- a/src/core/CL/kernels/CLThresholdKernel.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLThresholdKernel.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <string>
-
-namespace arm_compute
-{
-void CLThresholdKernel::configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
-}
-
-void CLThresholdKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-
- // Construct kernel name
- std::string kernel_name = "threshold";
-
- switch(info.type)
- {
- case ThresholdType::BINARY:
- kernel_name += "_binary";
- break;
- case ThresholdType::RANGE:
- kernel_name += "_range";
- break;
- default:
- ARM_COMPUTE_ERROR("Thresholding type not recognized");
- break;
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, kernel_name);
-
- // Set arguments
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg(idx++, info.false_value);
- _kernel.setArg(idx++, info.true_value);
- _kernel.setArg(idx++, info.threshold);
-
- if(ThresholdType::RANGE == info.type)
- {
- _kernel.setArg(idx++, info.upper);
- }
-
- // Make sure _kernel is initialized before calling the parent's configure
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
-}
-} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLThresholdKernel.h b/src/core/CL/kernels/CLThresholdKernel.h
deleted file mode 100644
index 511eaed1bf..0000000000
--- a/src/core/CL/kernels/CLThresholdKernel.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H
-#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the thresholding kernel. */
-class CLThresholdKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] info Threshold descriptor
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
- /**Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] info Threshold descriptor
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp
deleted file mode 100644
index 600c67a528..0000000000
--- a/src/core/CL/kernels/CLWarpAffineKernel.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLWarpAffineKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/AccessWindowStatic.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <cstddef>
-#include <set>
-#include <sstream>
-#include <string>
-
-namespace arm_compute
-{
-namespace
-{
-void options_add_matrix(std::set<std::string> &options, const std::array<float, 9> &matrix)
-{
- for(size_t i = 0; i < 6; ++i)
- {
- std::stringstream mat_str;
- mat_str << "-DMAT" << i << "=" << matrix[i] << " ";
- options.insert(mat_str.str());
- }
-}
-} // namespace
-
-BorderSize CLWarpAffineKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy);
-}
-
-void CLWarpAffineKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(InterpolationPolicy::AREA == policy);
-
- _input = input;
- _output = output;
-
- // Create build options
- std::set<std::string> options;
- options_add_matrix(options, matrix);
- options.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-
- // Create kernel
- std::string interpolation_name = string_from_interpolation_policy(policy);
- std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
- const std::string kernel_name = "warp_affine_" + interpolation_name;
- _kernel = create_kernel(compile_context, kernel_name, options);
-
- // Set static kernel arguments
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg<cl_int>(idx++, input->info()->dimension(0));
- _kernel.setArg<cl_int>(idx++, input->info()->dimension(1));
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 4;
-
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
- int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration);
- const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0);
-
- AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(2));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(3));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(2));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(3));
- _config_id += "_";
- _config_id += lower_string(string_from_interpolation_policy(policy));
-}
-} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLWarpAffineKernel.h b/src/core/CL/kernels/CLWarpAffineKernel.h
deleted file mode 100644
index c600ee780d..0000000000
--- a/src/core/CL/kernels/CLWarpAffineKernel.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H
-#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the warp affine kernel.*/
-class CLWarpAffineKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */
diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
deleted file mode 100644
index 5f20a0bdd3..0000000000
--- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/AccessWindowStatic.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <cstddef>
-#include <set>
-#include <sstream>
-#include <string>
-
-using namespace arm_compute;
-
-namespace
-{
-inline void options_add_matrix(std::set<std::string> &options, const std::array<float, 9> &matrix)
-{
- for(size_t i = 0; i < 9; ++i)
- {
- std::stringstream mat_str;
- mat_str << "-DMAT" << i << "=" << matrix[i] << " ";
- options.insert(mat_str.str());
- }
-}
-} // namespace
-
-BorderSize CLWarpPerspectiveKernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy);
-}
-
-void CLWarpPerspectiveKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(InterpolationPolicy::AREA == policy);
-
- _input = input;
- _output = output;
-
- // Create build options
- std::set<std::string> options;
- options_add_matrix(options, matrix);
- options.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-
- // Create kernel
- std::string interpolation_name = string_from_interpolation_policy(policy);
- std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
- std::string kernel_name = "warp_perspective_" + interpolation_name;
- _kernel = create_kernel(compile_context, kernel_name, options);
-
- // Set static kernel arguments
- unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg<cl_int>(idx++, input->info()->dimension(0));
- _kernel.setArg<cl_int>(idx++, input->info()->dimension(1));
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 4;
-
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, input->info()->dimension(0) + border_size().right, input->info()->dimension(1) + border_size().bottom);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.h b/src/core/CL/kernels/CLWarpPerspectiveKernel.h
deleted file mode 100644
index dcbe1c5560..0000000000
--- a/src/core/CL/kernels/CLWarpPerspectiveKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
-#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-/** Interface for the warp perspective kernel.*/
-class CLWarpPerspectiveKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index aea245c6fb..b2c5592960 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -35,7 +35,6 @@
#include "src/core/NEON/kernels/NECol2ImKernel.h"
#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "src/core/NEON/kernels/NEConvolutionKernel.h"
#include "src/core/NEON/kernels/NECropKernel.h"
#include "src/core/NEON/kernels/NECumulativeDistributionKernel.h"
#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
@@ -72,7 +71,6 @@
#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h"
-#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
#include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
#include "src/core/NEON/kernels/NEPadLayerKernel.h"
#include "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
@@ -83,6 +81,7 @@
#include "src/core/NEON/kernels/NEROIPoolingLayerKernel.h"
#include "src/core/NEON/kernels/NERangeKernel.h"
#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
+#include "src/core/NEON/kernels/NERemapKernel.h"
#include "src/core/NEON/kernels/NEReorgLayerKernel.h"
#include "src/core/NEON/kernels/NEReverseKernel.h"
#include "src/core/NEON/kernels/NEScaleKernel.h"
diff --git a/src/core/NEON/kernels/NEConvolutionKernel.cpp b/src/core/NEON/kernels/NEConvolutionKernel.cpp
deleted file mode 100644
index 075de41203..0000000000
--- a/src/core/NEON/kernels/NEConvolutionKernel.cpp
+++ /dev/null
@@ -1,1625 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/NEON/kernels/NEConvolutionKernel.h"
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <algorithm>
-#include <arm_neon.h>
-#include <array>
-#include <cstdint>
-#include <cstring>
-#include <tuple>
-
-namespace arm_compute
-{
-namespace
-{
-const uint16x8_t max_int16 = vdupq_n_u16(INT16_MAX);
-
-inline void store_results(const int32x4_t &out, const int32x4_t &out2, int16_t *output)
-{
- const int16x8_t s16results = vcombine_s16(vqmovn_s32(out),
- vqmovn_s32(out2));
- vst1q_s16(output, s16results);
-}
-
-inline void store_results(const int32x4_t &out, const int32x4_t &out2, uint8_t *output)
-{
- const uint8x8_t u8results = vqmovn_u16(vcombine_u16(vqmovun_s32(out),
- vqmovun_s32(out2)));
- vst1_u8(output, u8results);
-}
-
-inline void store_results(const uint32x4_t &out, const uint32x4_t &out2, int16_t *output)
-{
- const uint16x8_t u16results = vcombine_u16(vqmovn_u32(out), vqmovn_u32(out2));
- const int16x8_t s16results = vreinterpretq_s16_u16(vminq_u16(u16results, max_int16));
- vst1q_s16(output, s16results);
-}
-
-inline void store_results(const uint32x4_t &out, const uint32x4_t &out2, uint8_t *output)
-{
- const uint8x8_t u8results = vqmovn_u16(vcombine_u16(vqmovn_u32(out),
- vqmovn_u32(out2)));
- vst1_u8(output, u8results);
-}
-
-inline void store_results(const int16x8_t &out, const int16x8_t &out2, int16_t *output)
-{
- vst1q_s16(output, out);
- vst1q_s16(output + 8, out2);
-}
-
-inline void store_results(const int16x8_t &out, const int16x8_t &out2, uint8_t *output)
-{
- const uint8x16_t u8results = vcombine_u8(vqmovun_s16(out),
- vqmovun_s16(out2));
- vst1q_u8(output, u8results);
-}
-
-inline void store_results(const uint16x8_t &out, const uint16x8_t &out2, uint8_t *output)
-{
- const uint8x16_t u8results = vcombine_u8(vqmovn_u16(out),
- vqmovn_u16(out2));
- vst1q_u8(output, u8results);
-}
-
-inline void store_results(const uint16x8_t &out, const uint16x8_t &out2, int16_t *output)
-{
- vst1q_s16(output, vreinterpretq_s16_u16(vminq_u16(out, max_int16)));
- vst1q_s16(output + 8, vreinterpretq_s16_u16(vminq_u16(out2, max_int16)));
-}
-
-inline void convolve_row3x1_unrolled(int32x4_t &out, int32x4_t &out2, const uint8x16_t &row_data, const int16x4_t &mat0, const int16x4_t &mat1, const int16x4_t &mat2)
-{
- // Convert to s16 and split in blocks of 4 values:
- const int16x8_t s16_tmp0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(row_data)));
- const int16x8_t s16_tmp1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(row_data)));
-
- const int16x4x3_t row =
- {
- {
- vget_low_s16(s16_tmp0),
- vget_high_s16(s16_tmp0),
- vget_low_s16(s16_tmp1)
- }
- };
-
- // Calculate row left value for pixels [0,3]
- out = vmlal_s16(out, row.val[0], mat0);
- // Calculate row middle value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 1), mat1);
- // Calculate row right value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 2), mat2);
-
- // Calculate row left value for pixels [4,7]
- out2 = vmlal_s16(out2, row.val[1], mat0);
- // Calculate row middle value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 1), mat1);
- // Calculate row right value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 2), mat2);
-}
-
-inline void convolve_row3x1(int32x4_t &out, int32x4_t &out2, const uint8x16_t &row_data, const int16_t *convolution)
-{
- const int16x4_t mat0 = vld1_dup_s16(convolution);
- const int16x4_t mat1 = vld1_dup_s16(convolution + 1);
- const int16x4_t mat2 = vld1_dup_s16(convolution + 2);
-
- convolve_row3x1_unrolled(out, out2, row_data, mat0, mat1, mat2);
-}
-
-inline void convolve_row5x1(int32x4_t &out, int32x4_t &out2, const uint8x16_t &row_data, const int16_t *convolution)
-{
- const int16x4_t mat0 = vld1_dup_s16(convolution);
- const int16x4_t mat1 = vld1_dup_s16(convolution + 1);
- const int16x4_t mat2 = vld1_dup_s16(convolution + 2);
- const int16x4_t mat3 = vld1_dup_s16(convolution + 3);
- const int16x4_t mat4 = vld1_dup_s16(convolution + 4);
-
- // Convert to s16 and split in blocks of 4 values:
- const int16x8_t s16_tmp0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(row_data)));
- const int16x8_t s16_tmp1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(row_data)));
-
- const int16x4x3_t row =
- {
- {
- vget_low_s16(s16_tmp0),
- vget_high_s16(s16_tmp0),
- vget_low_s16(s16_tmp1)
- }
- };
-
- // Calculate row left 2 value for pixels [0,3]
- out = vmlal_s16(out, row.val[0], mat0);
- // Calculate row left 1 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 1), mat1);
- // Calculate row middle value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 2), mat2);
- // Calculate row right +1 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 3), mat3);
- // Calculate row right +2 value for pixels [0,3]
- out = vmlal_s16(out, row.val[1], mat4);
-
- // Calculate row left 2 value for pixels [4,7]
- out2 = vmlal_s16(out2, row.val[1], mat0);
- // Calculate row left 1 value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 1), mat1);
- // Calculate row middle value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 2), mat2);
- // Calculate row right +1 value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 3), mat3);
- // Calculate row right +2 value for pixels [4,7]
- out2 = vmlal_s16(out2, row.val[2], mat4);
-}
-
-inline void convolve_row7x1(int32x4_t &out, int32x4_t &out2, const uint8x16_t &row_data, const int16_t *convolution)
-{
- const int16x4_t mat0 = vld1_dup_s16(convolution);
- const int16x4_t mat1 = vld1_dup_s16(convolution + 1);
- const int16x4_t mat2 = vld1_dup_s16(convolution + 2);
- const int16x4_t mat3 = vld1_dup_s16(convolution + 3);
- const int16x4_t mat4 = vld1_dup_s16(convolution + 4);
- const int16x4_t mat5 = vld1_dup_s16(convolution + 5);
- const int16x4_t mat6 = vld1_dup_s16(convolution + 6);
-
- // Convert to s16 and split in blocks of 4 values:
- const int16x8_t s16_tmp0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(row_data)));
- const int16x8_t s16_tmp1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(row_data)));
-
- const int16x4x4_t row =
- {
- {
- vget_low_s16(s16_tmp0),
- vget_high_s16(s16_tmp0),
- vget_low_s16(s16_tmp1),
- vget_high_s16(s16_tmp1)
- }
- };
-
- // Calculate row left 3 value for pixels [0,3]
- out = vmlal_s16(out, row.val[0], mat0);
- // Calculate row left 2 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 1), mat1);
- // Calculate row left 1 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 2), mat2);
- // Calculate row middle value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 3), mat3);
- // Calculate row right +1 value for pixels [0,3]
- out = vmlal_s16(out, row.val[1], mat4);
- // Calculate row right +2 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[1], row.val[2], 1), mat5);
- // Calculate row right +3 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[1], row.val[2], 2), mat6);
-
- // Calculate row left 3 value for pixels [4,7]
- out2 = vmlal_s16(out2, row.val[1], mat0);
- // Calculate row left 2 value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 1), mat1);
- // Calculate row left 1 value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 2), mat2);
- // Calculate row middle value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 3), mat3);
- // Calculate row right +1 value for pixels [4,7]
- out2 = vmlal_s16(out2, row.val[2], mat4);
- // Calculate row right +2 value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[2], row.val[3], 1), mat5);
- // Calculate row right +3 value for pixels [4,7]
- out2 = vmlal_s16(out2, vext_s16(row.val[2], row.val[3], 2), mat6);
-}
-
-inline void convolve_row9x1(int32x4_t &out, int32x4_t &out2, const uint8x16_t &row_data, const int16_t *convolution)
-{
- const int16x4_t mat0 = vld1_dup_s16(convolution);
- const int16x4_t mat1 = vld1_dup_s16(convolution + 1);
- const int16x4_t mat2 = vld1_dup_s16(convolution + 2);
- const int16x4_t mat3 = vld1_dup_s16(convolution + 3);
- const int16x4_t mat4 = vld1_dup_s16(convolution + 4);
- const int16x4_t mat5 = vld1_dup_s16(convolution + 5);
- const int16x4_t mat6 = vld1_dup_s16(convolution + 6);
- const int16x4_t mat7 = vld1_dup_s16(convolution + 7);
- const int16x4_t mat8 = vld1_dup_s16(convolution + 8);
-
- // Convert to s16 and split in blocks of 4 values:
- const int16x8_t s16_tmp0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(row_data)));
- const int16x8_t s16_tmp1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(row_data)));
-
- const int16x4x4_t row =
- {
- {
- vget_low_s16(s16_tmp0),
- vget_high_s16(s16_tmp0),
- vget_low_s16(s16_tmp1),
- vget_high_s16(s16_tmp1)
- }
- };
-
- // Calculate row left 4 value for pixels [0,3]
- out = vmlal_s16(out, row.val[0], mat0);
- // Calculate row left 3 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 1), mat1);
- // Calculate row left 2 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 2), mat2);
- // Calculate row left 1 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[0], row.val[1], 3), mat3);
- // Calculate row middle value for pixels [0,3]
- out = vmlal_s16(out, row.val[1], mat4);
- // Calculate row right +1 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[1], row.val[2], 1), mat5);
- // Calculate row right +2 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[1], row.val[2], 2), mat6);
- // Calculate row right +3 value for pixels [0,3]
- out = vmlal_s16(out, vext_s16(row.val[1], row.val[2], 3), mat7);
- // Calculate row right +4 value for pixels [0,3]
- out = vmlal_s16(out, row.val[2], mat8);
-
- // Calculate row left 4 value for pixels [0,3]
- out2 = vmlal_s16(out2, row.val[1], mat0);
- // Calculate row left 3 value for pixels [0,3]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 1), mat1);
- // Calculate row left 2 value for pixels [0,3]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 2), mat2);
- // Calculate row left 1 value for pixels [0,3]
- out2 = vmlal_s16(out2, vext_s16(row.val[1], row.val[2], 3), mat3);
- // Calculate row middle value for pixels [0,3]
- out2 = vmlal_s16(out2, row.val[2], mat4);
- // Calculate row right +1 value for pixels [0,3]
- out2 = vmlal_s16(out2, vext_s16(row.val[2], row.val[3], 1), mat5);
- // Calculate row right +2 value for pixels [0,3]
- out2 = vmlal_s16(out2, vext_s16(row.val[2], row.val[3], 2), mat6);
- // Calculate row right +3 value for pixels [0,3]
- out2 = vmlal_s16(out2, vext_s16(row.val[2], row.val[3], 3), mat7);
- // Calculate row right +4 value for pixels [0,3]
- out2 = vmlal_s16(out2, row.val[3], mat8);
-}
-} // namespace
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-template <unsigned int matrix_size>
-NEConvolutionKernel<matrix_size>::NEConvolutionKernel()
- : INESimpleKernel(), _scale(0), _convolution{ {} }
-{
-}
-
-template <unsigned int matrix_size>
-BorderSize NEConvolutionKernel<matrix_size>::border_size() const
-{
- return BorderSize{ matrix_size / 2 };
-}
-
-template <unsigned int matrix_size>
-void NEConvolutionKernel<matrix_size>::configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, conv);
-
- set_shape_if_empty(*output->info(), input->info()->tensor_shape());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
-
- _input = input;
- _output = output;
-
- std::copy_n(conv, _convolution.size(), _convolution.begin());
-
- if(scale == 0)
- {
- _scale = calculate_matrix_scale(_convolution.data(), matrix_size);
- }
- else
- {
- _scale = scale;
- }
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, matrix_size),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- INEKernel::configure(win);
-}
-
-template <>
-template <typename OutputType>
-void NEConvolutionKernel<3>::convolution(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
- ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr);
-
- Iterator input(_input, win);
- Iterator output(_output, win);
-
- // Load the matrix's coefficients into Neon registers:
- const int16x4_t mat00 = vld1_dup_s16(_convolution.data());
- const int16x4_t mat01 = vld1_dup_s16(_convolution.data() + 1);
- const int16x4_t mat02 = vld1_dup_s16(_convolution.data() + 2);
- const int16x4_t mat10 = vld1_dup_s16(_convolution.data() + 3);
- const int16x4_t mat11 = vld1_dup_s16(_convolution.data() + 4);
- const int16x4_t mat12 = vld1_dup_s16(_convolution.data() + 5);
- const int16x4_t mat20 = vld1_dup_s16(_convolution.data() + 6);
- const int16x4_t mat21 = vld1_dup_s16(_convolution.data() + 7);
- const int16x4_t mat22 = vld1_dup_s16(_convolution.data() + 8);
- const float32x4_t scale_val = vdupq_n_f32(1.0f / _scale);
-
- const unsigned char *input_top_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-1, -1));
- const unsigned char *input_mid_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-1, 0));
- const unsigned char *input_low_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-1, 1));
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- int32x4_t out = vdupq_n_s32(0);
- int32x4_t out2 = vdupq_n_s32(0);
-
- // Load 16 bytes from the top row:
- const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset());
- convolve_row3x1_unrolled(out, out2, top_data, mat00, mat01, mat02);
-
- // Load 16 bytes from the middle row:
- const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset());
- convolve_row3x1_unrolled(out, out2, mid_data, mat10, mat11, mat12);
-
- // Load 16 bytes from the middle row:
- const uint8x16_t low_data = vld1q_u8(input_low_ptr + input.offset());
- convolve_row3x1_unrolled(out, out2, low_data, mat20, mat21, mat22);
-
- // Apply scale
- if(_scale != 1)
- {
- // Convert to F32, scale and convert back to S32
- out = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out), scale_val));
- out2 = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out2), scale_val));
- }
-
- // Clamp and store as U8 or S16:
- store_results(out, out2, reinterpret_cast<OutputType *>(output.ptr()));
- },
- input, output);
-}
-
-template <>
-template <typename OutputType>
-void NEConvolutionKernel<5>::convolution(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
- ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr);
-
- Iterator input(_input, win);
- Iterator output(_output, win);
-
- const float32x4_t scale_val = vdupq_n_f32(1.0f / _scale);
-
- const unsigned char *input_top2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-2, -2));
- const unsigned char *input_top1_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-2, -1));
- const unsigned char *input_mid_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-2, 0));
- const unsigned char *input_low1_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-2, 1));
- const unsigned char *input_low2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-2, 2));
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- int32x4_t out = vdupq_n_s32(0);
- int32x4_t out2 = vdupq_n_s32(0);
-
- // Load 16 bytes from the top2 row:
- const uint8x16_t data_t2 = vld1q_u8(input_top2_ptr + input.offset());
- convolve_row5x1(out, out2, data_t2, _convolution.data());
-
- // Load 16 bytes from the top1 row:
- const uint8x16_t data_t1 = vld1q_u8(input_top1_ptr + input.offset());
- convolve_row5x1(out, out2, data_t1, _convolution.data() + 5);
-
- // Load 16 bytes from the middle row:
- const uint8x16_t data_m = vld1q_u8(input_mid_ptr + input.offset());
- convolve_row5x1(out, out2, data_m, _convolution.data() + 10);
-
- // Load 16 bytes from the low1 row:
- const uint8x16_t data_b1 = vld1q_u8(input_low1_ptr + input.offset());
- convolve_row5x1(out, out2, data_b1, _convolution.data() + 15);
-
- // Load 16 bytes from the low2 row:
- const uint8x16_t data_b2 = vld1q_u8(input_low2_ptr + input.offset());
- convolve_row5x1(out, out2, data_b2, _convolution.data() + 20);
-
- // Apply scale
- if(_scale != 1)
- {
- // Convert to F32, scale and convert back to S32
- out = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out), scale_val));
- out2 = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out2), scale_val));
- }
-
- // Clamp and store as U8 or S16:
- store_results(out, out2, reinterpret_cast<OutputType *>(output.ptr()));
- },
- input, output);
-}
-
-template <>
-template <typename OutputType>
-void NEConvolutionKernel<7>::convolution(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
- ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr);
-
- Iterator input(_input, win);
- Iterator output(_output, win);
-
- const float32x4_t scale_val = vdupq_n_f32(1.0f / _scale);
-
- const unsigned char *input_top3_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-3, -3));
- const unsigned char *input_top2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-3, -2));
- const unsigned char *input_top1_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-3, -1));
- const unsigned char *input_mid_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-3, 0));
- const unsigned char *input_low1_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-3, 1));
- const unsigned char *input_low2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-3, 2));
- const unsigned char *input_low3_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-3, 3));
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- int32x4_t out = vdupq_n_s32(0);
- int32x4_t out2 = vdupq_n_s32(0);
-
- // Load 16 bytes from the top3 row:
- const uint8x16_t data_t3 = vld1q_u8(input_top3_ptr + input.offset());
- convolve_row7x1(out, out2, data_t3, _convolution.data());
-
- // Load 16 bytes from the top2 row:
- const uint8x16_t data_t2 = vld1q_u8(input_top2_ptr + input.offset());
- convolve_row7x1(out, out2, data_t2, _convolution.data() + 7);
-
- // Load 16 bytes from the top1 row:
- const uint8x16_t data_t1 = vld1q_u8(input_top1_ptr + input.offset());
- convolve_row7x1(out, out2, data_t1, _convolution.data() + 14);
-
- // Load 16 bytes from the middle row:
- const uint8x16_t data_m = vld1q_u8(input_mid_ptr + input.offset());
- convolve_row7x1(out, out2, data_m, _convolution.data() + 21);
-
- // Load 16 bytes from the low1 row:
- const uint8x16_t data_b1 = vld1q_u8(input_low1_ptr + input.offset());
- convolve_row7x1(out, out2, data_b1, _convolution.data() + 28);
-
- // Load 16 bytes from the low2 row:
- const uint8x16_t data_b2 = vld1q_u8(input_low2_ptr + input.offset());
- convolve_row7x1(out, out2, data_b2, _convolution.data() + 35);
-
- // Load 16 bytes from the low3 row:
- const uint8x16_t data_b3 = vld1q_u8(input_low3_ptr + input.offset());
- convolve_row7x1(out, out2, data_b3, _convolution.data() + 42);
-
- // Apply scale
- if(_scale != 1)
- {
- // Convert to F32, scale and convert back to S32
- out = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out), scale_val));
- out2 = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out2), scale_val));
- }
-
- // Clamp and store as U8 or S16:
- store_results(out, out2, reinterpret_cast<OutputType *>(output.ptr()));
- },
- input, output);
-}
-
-template <>
-template <typename OutputType>
-void NEConvolutionKernel<9>::convolution(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
- ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr);
-
- Iterator input(_input, win);
- Iterator output(_output, win);
-
- const float32x4_t scale_val = vdupq_n_f32(1.0f / _scale);
-
- const unsigned char *input_top4_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, -4));
- const unsigned char *input_top3_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, -3));
- const unsigned char *input_top2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, -2));
- const unsigned char *input_top1_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, -1));
- const unsigned char *input_mid_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, 0));
- const unsigned char *input_low1_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, 1));
- const unsigned char *input_low2_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, 2));
- const unsigned char *input_low3_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, 3));
- const unsigned char *input_low4_ptr = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-4, 4));
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- int32x4_t out = vdupq_n_s32(0);
- int32x4_t out2 = vdupq_n_s32(0);
-
- // Load 16 bytes from the top4 row:
- const uint8x16_t data_t4 = vld1q_u8(input_top4_ptr + input.offset());
- convolve_row9x1(out, out2, data_t4, _convolution.data());
-
- // Load 16 bytes from the top3 row:
- const uint8x16_t data_t3 = vld1q_u8(input_top3_ptr + input.offset());
- convolve_row9x1(out, out2, data_t3, _convolution.data() + 9);
-
- // Load 16 bytes from the top2 row:
- const uint8x16_t data_t2 = vld1q_u8(input_top2_ptr + input.offset());
- convolve_row9x1(out, out2, data_t2, _convolution.data() + 18);
-
- // Load 16 bytes from the top1 row:
- const uint8x16_t data_t1 = vld1q_u8(input_top1_ptr + input.offset());
- convolve_row9x1(out, out2, data_t1, _convolution.data() + 27);
-
- // Load 16 bytes from the middle row:
- const uint8x16_t data_m = vld1q_u8(input_mid_ptr + input.offset());
- convolve_row9x1(out, out2, data_m, _convolution.data() + 36);
-
- // Load 16 bytes from the low1 row:
- const uint8x16_t data_b1 = vld1q_u8(input_low1_ptr + input.offset());
- convolve_row9x1(out, out2, data_b1, _convolution.data() + 45);
-
- // Load 16 bytes from the low2 row:
- const uint8x16_t data_b2 = vld1q_u8(input_low2_ptr + input.offset());
- convolve_row9x1(out, out2, data_b2, _convolution.data() + 54);
-
- // Load 16 bytes from the low3 row:
- const uint8x16_t data_b3 = vld1q_u8(input_low3_ptr + input.offset());
- convolve_row9x1(out, out2, data_b3, _convolution.data() + 63);
-
- // Load 16 bytes from the low4 row:
- const uint8x16_t data_b4 = vld1q_u8(input_low4_ptr + input.offset());
- convolve_row9x1(out, out2, data_b4, _convolution.data() + 72);
-
- // Apply scale
- if(_scale != 1)
- {
- // Convert to F32, scale and convert back to S32
- out = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out), scale_val));
- out2 = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out2), scale_val));
- }
-
- // Clamp and store as U8 or S16:
- store_results(out, out2, reinterpret_cast<OutputType *>(output.ptr()));
- },
- input, output);
-}
-
-template <unsigned int matrix_size>
-void NEConvolutionKernel<matrix_size>::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- switch(_output->info()->data_type())
- {
- case DataType::U8:
- convolution<uint8_t>(window);
- break;
- case DataType::S16:
- convolution<int16_t>(window);
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported Data type!");
- break;
- }
-}
-
-template class arm_compute::NEConvolutionKernel<3>;
-template class arm_compute::NEConvolutionKernel<5>;
-template class arm_compute::NEConvolutionKernel<7>;
-template class arm_compute::NEConvolutionKernel<9>;
-
-/****************************************************************************************\
- * Separable Square Convolution *
-\****************************************************************************************/
-
-template <unsigned int matrix_size>
-NESeparableConvolutionHorKernel<matrix_size>::NESeparableConvolutionHorKernel()
- : _conv_row{ { 0 } }, _border_size(0)
-{
-}
-
-template <unsigned int matrix_size>
-BorderSize NESeparableConvolutionHorKernel<matrix_size>::border_size() const
-{
- return _border_size;
-}
-
-template <unsigned int matrix_size>
-void NESeparableConvolutionHorKernel<matrix_size>::configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, conv_row);
-
- set_shape_if_empty(*output->info(), input->info()->tensor_shape());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16, DataType::S16, DataType::S32);
-
- _input = input;
- _output = output;
- std::copy_n(conv_row, _conv_row.size(), _conv_row.begin());
- _border_size = BorderSize(border_undefined ? 0 : matrix_size / 2, matrix_size / 2);
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
-
- Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), -border_size().left, num_elems_read_per_iteration),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- INEKernel::configure(win);
-}
-
-template <unsigned int matrix_size>
-void NESeparableConvolutionHorKernel<matrix_size>::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- switch(_output->info()->data_type())
- {
- case DataType::U16:
- convolve<uint16_t>(window);
- break;
- case DataType::S16:
- convolve<int16_t>(window);
- break;
- case DataType::S32:
- convolve<int32_t>(window);
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported intermediate data type!");
- break;
- }
-}
-
-template <>
-template <>
-inline void NESeparableConvolutionHorKernel<5>::convolve<uint16_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -2);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const uint16x8x2_t data_u16 =
- {
- {
- vmovl_u8(vget_low_u8(data)),
- vmovl_u8(vget_high_u8(data))
- }
- };
-
- uint16x8_t out = vmulq_n_u16(data_u16.val[0], _conv_row[0]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 1), _conv_row[1]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 2), _conv_row[2]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 3), _conv_row[3]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 4), _conv_row[4]);
-
- vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), out);
- },
- input, output);
-}
-
-template <>
-template <>
-inline void NESeparableConvolutionHorKernel<5>::convolve<int16_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -2);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const int16x8x2_t data_s16 =
- {
- {
- vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))),
- vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data)))
- }
- };
-
- int16x8_t out = vmulq_n_s16(data_s16.val[0], _conv_row[0]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 1), _conv_row[1]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 2), _conv_row[2]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 3), _conv_row[3]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 4), _conv_row[4]);
-
- vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), out);
- },
- input, output);
-}
-
-template <>
-template <>
-void NESeparableConvolutionHorKernel<5>::convolve<int32_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -2);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const int16x8x2_t data_s16 =
- {
- {
- vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))),
- vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data)))
- }
- };
-
- const int16x8_t data_s16_l1 = vextq_s16(data_s16.val[0], data_s16.val[1], 1);
- const int16x8_t data_s16_m = vextq_s16(data_s16.val[0], data_s16.val[1], 2);
- const int16x8_t data_s16_r1 = vextq_s16(data_s16.val[0], data_s16.val[1], 3);
- const int16x8_t data_s16_r2 = vextq_s16(data_s16.val[0], data_s16.val[1], 4);
-
- int32x4_t out_low = vmull_n_s16(vget_low_s16(data_s16.val[0]), _conv_row[0]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_l1), _conv_row[1]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_m), _conv_row[2]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r1), _conv_row[3]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r2), _conv_row[4]);
-
- vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), out_low);
-
- int32x4_t out_high = vmull_n_s16(vget_high_s16(data_s16.val[0]), _conv_row[0]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_l1), _conv_row[1]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_m), _conv_row[2]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r1), _conv_row[3]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r2), _conv_row[4]);
-
- vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, out_high);
- },
- input, output);
-}
-
-template <>
-template <>
-inline void NESeparableConvolutionHorKernel<7>::convolve<uint16_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -3);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const uint16x8x2_t data_u16 =
- {
- {
- vmovl_u8(vget_low_u8(data)),
- vmovl_u8(vget_high_u8(data))
- }
- };
-
- uint16x8_t out = vmulq_n_u16(data_u16.val[0], _conv_row[0]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 1), _conv_row[1]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 2), _conv_row[2]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 3), _conv_row[3]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 4), _conv_row[4]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 5), _conv_row[5]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 6), _conv_row[6]);
-
- vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), out);
- },
- input, output);
-}
-
-template <>
-template <>
-inline void NESeparableConvolutionHorKernel<7>::convolve<int16_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -3);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const int16x8x2_t data_s16 =
- {
- {
- vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))),
- vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data)))
- }
- };
-
- int16x8_t out = vmulq_n_s16(data_s16.val[0], _conv_row[0]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 1), _conv_row[1]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 2), _conv_row[2]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 3), _conv_row[3]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 4), _conv_row[4]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 5), _conv_row[5]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 6), _conv_row[6]);
-
- vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), out);
- },
- input, output);
-}
-
-template <>
-template <>
-void NESeparableConvolutionHorKernel<7>::convolve<int32_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -3);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const int16x8x2_t data_s16 =
- {
- {
- vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))),
- vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data)))
- }
- };
-
- const int16x8_t data_s16_l2 = vextq_s16(data_s16.val[0], data_s16.val[1], 1);
- const int16x8_t data_s16_l1 = vextq_s16(data_s16.val[0], data_s16.val[1], 2);
- const int16x8_t data_s16_m = vextq_s16(data_s16.val[0], data_s16.val[1], 3);
- const int16x8_t data_s16_r1 = vextq_s16(data_s16.val[0], data_s16.val[1], 4);
- const int16x8_t data_s16_r2 = vextq_s16(data_s16.val[0], data_s16.val[1], 5);
- const int16x8_t data_s16_r3 = vextq_s16(data_s16.val[0], data_s16.val[1], 6);
-
- int32x4_t out_low = vmull_n_s16(vget_low_s16(data_s16.val[0]), _conv_row[0]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_l2), _conv_row[1]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_l1), _conv_row[2]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_m), _conv_row[3]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r1), _conv_row[4]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r2), _conv_row[5]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r3), _conv_row[6]);
-
- vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), out_low);
-
- int32x4_t out_high = vmull_n_s16(vget_high_s16(data_s16.val[0]), _conv_row[0]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_l2), _conv_row[1]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_l1), _conv_row[2]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_m), _conv_row[3]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r1), _conv_row[4]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r2), _conv_row[5]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r3), _conv_row[6]);
-
- vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, out_high);
- },
- input, output);
-}
-
-template <>
-template <>
-inline void NESeparableConvolutionHorKernel<9>::convolve<uint16_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -4);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const uint16x8x2_t data_u16 =
- {
- {
- vmovl_u8(vget_low_u8(data)),
- vmovl_u8(vget_high_u8(data))
- }
- };
-
- uint16x8_t out = vmulq_n_u16(data_u16.val[0], _conv_row[0]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 1), _conv_row[1]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 2), _conv_row[2]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 3), _conv_row[3]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 4), _conv_row[4]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 5), _conv_row[5]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 6), _conv_row[6]);
- out = vmlaq_n_u16(out, vextq_u16(data_u16.val[0], data_u16.val[1], 7), _conv_row[7]);
- out = vmlaq_n_u16(out, data_u16.val[1], _conv_row[8]);
-
- vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), out);
- },
- input, output);
-}
-
-template <>
-template <>
-inline void NESeparableConvolutionHorKernel<9>::convolve<int16_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -4);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const int16x8x2_t data_s16 =
- {
- {
- vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))),
- vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data)))
- }
- };
-
- int16x8_t out = vmulq_n_s16(data_s16.val[0], _conv_row[0]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 1), _conv_row[1]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 2), _conv_row[2]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 3), _conv_row[3]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 4), _conv_row[4]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 5), _conv_row[5]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 6), _conv_row[6]);
- out = vmlaq_n_s16(out, vextq_s16(data_s16.val[0], data_s16.val[1], 7), _conv_row[7]);
- out = vmlaq_n_s16(out, data_s16.val[1], _conv_row[8]);
-
- vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), out);
- },
- input, output);
-}
-
-template <>
-template <>
-void NESeparableConvolutionHorKernel<9>::convolve<int32_t>(const Window &window)
-{
- Window win_in(window);
- win_in.shift(Window::DimX, -4);
-
- Iterator input(_input, win_in);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
-
- const int16x8x2_t data_s16 =
- {
- {
- vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(data))),
- vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(data)))
- }
- };
-
- const int16x8_t data_s16_l3 = vextq_s16(data_s16.val[0], data_s16.val[1], 1);
- const int16x8_t data_s16_l2 = vextq_s16(data_s16.val[0], data_s16.val[1], 2);
- const int16x8_t data_s16_l1 = vextq_s16(data_s16.val[0], data_s16.val[1], 3);
- const int16x8_t data_s16_m = vextq_s16(data_s16.val[0], data_s16.val[1], 4);
- const int16x8_t data_s16_r1 = vextq_s16(data_s16.val[0], data_s16.val[1], 5);
- const int16x8_t data_s16_r2 = vextq_s16(data_s16.val[0], data_s16.val[1], 6);
- const int16x8_t data_s16_r3 = vextq_s16(data_s16.val[0], data_s16.val[1], 7);
-
- int32x4_t out_low = vmull_n_s16(vget_low_s16(data_s16.val[0]), _conv_row[0]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_l3), _conv_row[1]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_l2), _conv_row[2]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_l1), _conv_row[3]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_m), _conv_row[4]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r1), _conv_row[5]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r2), _conv_row[6]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16_r3), _conv_row[7]);
- out_low = vmlal_n_s16(out_low, vget_low_s16(data_s16.val[1]), _conv_row[8]);
-
- vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), out_low);
-
- int32x4_t out_high = vmull_n_s16(vget_high_s16(data_s16.val[0]), _conv_row[0]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_l3), _conv_row[1]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_l2), _conv_row[2]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_l1), _conv_row[3]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_m), _conv_row[4]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r1), _conv_row[5]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r2), _conv_row[6]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16_r3), _conv_row[7]);
- out_high = vmlal_n_s16(out_high, vget_high_s16(data_s16.val[1]), _conv_row[8]);
-
- vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, out_high);
- },
- input, output);
-}
-
-template class arm_compute::NESeparableConvolutionHorKernel<5>;
-template class arm_compute::NESeparableConvolutionHorKernel<7>;
-template class arm_compute::NESeparableConvolutionHorKernel<9>;
-
-template <unsigned int matrix_size>
-NESeparableConvolutionVertKernel<matrix_size>::NESeparableConvolutionVertKernel()
- : _conv_col{ { 0 } }, _scale(0)
-{
-}
-
-template <unsigned int matrix_size>
-BorderSize NESeparableConvolutionVertKernel<matrix_size>::border_size() const
-{
- return BorderSize{ matrix_size / 2, 0 };
-}
-
-template <unsigned int matrix_size>
-void NESeparableConvolutionVertKernel<matrix_size>::configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, conv_col);
-
- set_shape_if_empty(*output->info(), input->info()->tensor_shape());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON(scale == 0);
-
- _input = input;
- _output = output;
- std::copy_n(conv_col, _conv_col.size(), _conv_col.begin());
- _scale = scale;
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 16;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowRectangle(input->info(), 0, -border_size().top, num_elems_read_per_iteration, matrix_size),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- INEKernel::configure(win);
-}
-
-template <unsigned int matrix_size>
-void NESeparableConvolutionVertKernel<matrix_size>::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- switch(_input->info()->data_type())
- {
- case DataType::U16:
- switch(_output->info()->data_type())
- {
- case DataType::U8:
- convolution_u16<uint8_t>(window);
- break;
- case DataType::S16:
- convolution_u16<int16_t>(window);
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
- break;
- case DataType::S16:
- switch(_output->info()->data_type())
- {
- case DataType::U8:
- convolution_s16<uint8_t>(window);
- break;
- case DataType::S16:
- convolution_s16<int16_t>(window);
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
- break;
- case DataType::S32:
- switch(_output->info()->data_type())
- {
- case DataType::U8:
- convolution_s32<uint8_t>(window);
- break;
- case DataType::S16:
- convolution_s32<int16_t>(window);
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported intermediate data type!");
- break;
- }
-}
-
-template <unsigned int matrix_size>
-template <typename OutputType>
-void NESeparableConvolutionVertKernel<matrix_size>::convolution_u16(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
-
- Window win_in(win);
- win_in.set_dimension_step(Window::DimX, 8);
-
- Iterator in(_input, win_in);
- Iterator out(_output, win);
-
- std::array<unsigned char *, matrix_size> input_ptrs{ {} };
- const float32x4_t oneoverscale = vdupq_n_f32(1.0f / _scale);
- const int k_half = matrix_size / 2;
-
- // Set row pointers
- for(int i = -k_half; i <= k_half; ++i)
- {
- input_ptrs[k_half + i] = _input->ptr_to_element(Coordinates(0, i));
- }
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- uint16x8_t out0 = vdupq_n_u16(0);
- uint16x8_t out1 = vdupq_n_u16(0);
-
- // First half
- for(unsigned int r = 0; r < matrix_size; ++r)
- {
- const uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(input_ptrs[r] + in.offset()));
- out0 = vmlaq_n_u16(out0, data, _conv_col[r]);
- }
-
- in.increment(Window::DimX);
-
- // Second half
- for(unsigned int r = 0; r < matrix_size; ++r)
- {
- const uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(input_ptrs[r] + in.offset()));
- out1 = vmlaq_n_u16(out1, data, _conv_col[r]);
- }
-
- //scale the result if needed
- if(_scale != 1)
- {
- float32x4_t out0_f32_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(out0)));
- float32x4_t out0_f32_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(out0)));
- out0_f32_high = vmulq_f32(out0_f32_high, oneoverscale);
- out0_f32_low = vmulq_f32(out0_f32_low, oneoverscale);
- store_results(vcvtq_u32_f32(out0_f32_low), vcvtq_u32_f32(out0_f32_high), reinterpret_cast<OutputType *>(out.ptr()));
-
- float32x4_t out1_f32_high = vcvtq_f32_u32(vmovl_u16(vget_high_u16(out1)));
- float32x4_t out1_f32_low = vcvtq_f32_u32(vmovl_u16(vget_low_u16(out1)));
- out1_f32_high = vmulq_f32(out1_f32_high, oneoverscale);
- out1_f32_low = vmulq_f32(out1_f32_low, oneoverscale);
- store_results(vcvtq_u32_f32(out1_f32_low), vcvtq_u32_f32(out1_f32_high), reinterpret_cast<OutputType *>(out.ptr()) + 8);
- }
- else
- {
- store_results(out0, out1, reinterpret_cast<OutputType *>(out.ptr()));
- }
- },
- in, out);
-}
-
-template <unsigned int matrix_size>
-template <typename OutputType>
-void NESeparableConvolutionVertKernel<matrix_size>::convolution_s16(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
-
- Window win_in(win);
- win_in.set_dimension_step(Window::DimX, 8);
-
- Iterator in(_input, win_in);
- Iterator out(_output, win);
-
- std::array<unsigned char *, matrix_size> input_ptrs{ {} };
- const float32x4_t oneoverscale = vdupq_n_f32(1.0f / _scale);
- const int k_half = matrix_size / 2;
-
- // Set row pointers
- for(int i = -k_half; i <= k_half; ++i)
- {
- input_ptrs[k_half + i] = _input->ptr_to_element(Coordinates(0, i));
- }
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- int16x8_t out0 = vdupq_n_s16(0);
- int16x8_t out1 = vdupq_n_s16(0);
-
- // First half
- for(unsigned int r = 0; r < matrix_size; ++r)
- {
- const int16x8_t data = vld1q_s16(reinterpret_cast<const int16_t *>(input_ptrs[r] + in.offset()));
- out0 = vmlaq_n_s16(out0, data, _conv_col[r]);
- }
-
- in.increment(Window::DimX);
-
- // Second half
- for(unsigned int r = 0; r < matrix_size; ++r)
- {
- const int16x8_t data = vld1q_s16(reinterpret_cast<const int16_t *>(input_ptrs[r] + in.offset()));
- out1 = vmlaq_n_s16(out1, data, _conv_col[r]);
- }
-
- //scale the result if needed
- if(_scale != 1)
- {
- float32x4_t out0_f32_high = vcvtq_f32_s32(vmovl_s16(vget_high_s16(out0)));
- float32x4_t out0_f32_low = vcvtq_f32_s32(vmovl_s16(vget_low_s16(out0)));
- out0_f32_high = vmulq_f32(out0_f32_high, oneoverscale);
- out0_f32_low = vmulq_f32(out0_f32_low, oneoverscale);
- store_results(vcvtq_s32_f32(out0_f32_low), vcvtq_s32_f32(out0_f32_high), reinterpret_cast<OutputType *>(out.ptr()));
-
- float32x4_t out1_f32_high = vcvtq_f32_s32(vmovl_s16(vget_high_s16(out1)));
- float32x4_t out1_f32_low = vcvtq_f32_s32(vmovl_s16(vget_low_s16(out1)));
- out1_f32_high = vmulq_f32(out1_f32_high, oneoverscale);
- out1_f32_low = vmulq_f32(out1_f32_low, oneoverscale);
- store_results(vcvtq_s32_f32(out1_f32_low), vcvtq_s32_f32(out1_f32_high), reinterpret_cast<OutputType *>(out.ptr()) + 8);
- }
- else
- {
- store_results(out0, out1, reinterpret_cast<OutputType *>(out.ptr()));
- }
- },
- in, out);
-}
-
-template <unsigned int matrix_size>
-template <typename OutputType>
-void NESeparableConvolutionVertKernel<matrix_size>::convolution_s32(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
-
- Window win_in(win);
- win_in.set_dimension_step(Window::DimX, 8);
-
- Iterator in(_input, win_in);
- Iterator out(_output, win);
-
- std::array<unsigned char *, matrix_size> input_ptrs{ {} };
- const float32x4_t oneoverscale = vdupq_n_f32(1.0f / _scale);
- const int k_half = matrix_size / 2;
-
- // Set row pointers
- for(int i = -k_half; i <= k_half; ++i)
- {
- input_ptrs[k_half + i] = _input->ptr_to_element(Coordinates(0, i));
- }
-
- const int32x4_t zero = vdupq_n_s32(0);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- int32x4x2_t out0 =
- {
- {
- zero,
- zero
- }
- };
-
- int32x4x2_t out1 =
- {
- {
- zero,
- zero
- }
- };
-
- // First half
- for(unsigned int r = 0; r < matrix_size; ++r)
- {
- const int32x4x2_t data = vld2q_s32(reinterpret_cast<const int32_t *>(input_ptrs[r] + in.offset()));
- out0.val[0] = vmlaq_n_s32(out0.val[0], data.val[0], _conv_col[r]);
- out0.val[1] = vmlaq_n_s32(out0.val[1], data.val[1], _conv_col[r]);
- }
-
- in.increment(Window::DimX);
-
- // Second half
- for(unsigned int r = 0; r < matrix_size; ++r)
- {
- const int32x4x2_t data = vld2q_s32(reinterpret_cast<const int32_t *>(input_ptrs[r] + in.offset()));
- out1.val[0] = vmlaq_n_s32(out1.val[0], data.val[0], _conv_col[r]);
- out1.val[1] = vmlaq_n_s32(out1.val[1], data.val[1], _conv_col[r]);
- }
-
- //scale the result if needed
- if(_scale != 1)
- {
- float32x4_t out0_f32_odd = vcvtq_f32_s32(out0.val[0]);
- float32x4_t out0_f32_even = vcvtq_f32_s32(out0.val[1]);
- out0_f32_odd = vmulq_f32(out0_f32_odd, oneoverscale);
- out0_f32_even = vmulq_f32(out0_f32_even, oneoverscale);
- out0.val[0] = vcvtq_s32_f32(out0_f32_odd);
- out0.val[1] = vcvtq_s32_f32(out0_f32_even);
-
- float32x4_t out1_f32_odd = vcvtq_f32_s32(out1.val[0]);
- float32x4_t out1_f32_even = vcvtq_f32_s32(out1.val[1]);
- out1_f32_odd = vmulq_f32(out1_f32_odd, oneoverscale);
- out1_f32_even = vmulq_f32(out1_f32_even, oneoverscale);
- out1.val[0] = vcvtq_s32_f32(out1_f32_odd);
- out1.val[1] = vcvtq_s32_f32(out1_f32_even);
- }
-
- const int32x4x2_t out0_s32 = vzipq_s32(out0.val[0], out0.val[1]);
- store_results(out0_s32.val[0], out0_s32.val[1], reinterpret_cast<OutputType *>(out.ptr()));
-
- const int32x4x2_t out1_s32 = vzipq_s32(out1.val[0], out1.val[1]);
- store_results(out1_s32.val[0], out1_s32.val[1], reinterpret_cast<OutputType *>(out.ptr()) + 8);
- },
- in, out);
-}
-
-template class arm_compute::NESeparableConvolutionVertKernel<5>;
-template class arm_compute::NESeparableConvolutionVertKernel<7>;
-template class arm_compute::NESeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-NEConvolutionRectangleKernel::NEConvolutionRectangleKernel()
- : _input(nullptr), _output(nullptr), _scale(0), _convolution(), _border_size(), _func_idx(0)
-{
-}
-
-BorderSize NEConvolutionRectangleKernel::border_size() const
-{
- return _border_size;
-}
-
-void NEConvolutionRectangleKernel::configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, conv);
-
- set_shape_if_empty(*output->info(), input->info()->tensor_shape());
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
- ARM_COMPUTE_ERROR_ON(width != 3 && width != 5 && width != 7 && width != 9);
- ARM_COMPUTE_ERROR_ON(height != 3 && height != 5 && height != 7 && height != 9);
- ARM_COMPUTE_ERROR_ON(0 == scale);
-
- _input = input;
- _output = output;
- _scale = scale;
- _border_size = BorderSize(height / 2, width / 2);
-
- // Setup the convolution matrix
- const uint32_t nr_elements = width * height;
- _convolution.resize(nr_elements);
- std::copy_n(conv, nr_elements, _convolution.begin());
-
- // Set function index to help choose appropriate function in run()
- _func_idx = get_index(height) * 4 + get_index(width);
- ARM_COMPUTE_ERROR_ON(_func_idx > (_nr_supported_sizes * _nr_supported_sizes));
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration = 8;
- constexpr unsigned int num_elems_read_per_iteration = 16;
- constexpr unsigned int num_elems_written_per_iteration = 8;
-
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, _border_size);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowRectangle(input->info(), -_border_size.left, -_border_size.top, num_elems_read_per_iteration, height),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, _border_size);
-
- INEKernel::configure(win);
-}
-
-void NEConvolutionRectangleKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- using ConvolutionRectangleFunction = void (NEConvolutionRectangleKernel::*)(const Window & window);
-
- // uint8_t function table
- static const std::array<ConvolutionRectangleFunction, 16> func_table_u8 =
- {
- {
- &NEConvolutionRectangleKernel::convolution<uint8_t, 3, 3>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 3, 5>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 3, 7>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 3, 9>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 5, 3>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 5, 5>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 5, 7>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 5, 9>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 7, 3>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 7, 5>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 7, 7>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 7, 9>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 9, 3>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 9, 5>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 9, 7>,
- &NEConvolutionRectangleKernel::convolution<uint8_t, 9, 9>
- }
- };
- // int16_t function table
- static const std::array<ConvolutionRectangleFunction, 16> func_table_s16 =
- {
- {
- &NEConvolutionRectangleKernel::convolution<int16_t, 3, 3>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 3, 5>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 3, 7>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 3, 9>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 5, 3>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 5, 5>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 5, 7>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 5, 9>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 7, 3>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 7, 5>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 7, 7>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 7, 9>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 9, 3>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 9, 5>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 9, 7>,
- &NEConvolutionRectangleKernel::convolution<int16_t, 9, 9>
- }
- };
-
- // Run appropriate function
- switch(_output->info()->data_type())
- {
- case DataType::U8:
- ARM_COMPUTE_ERROR_ON(_func_idx >= func_table_u8.size());
- (this->*func_table_u8[_func_idx])(window);
- break;
- case DataType::S16:
- ARM_COMPUTE_ERROR_ON(_func_idx >= func_table_s16.size());
- (this->*func_table_s16[_func_idx])(window);
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
-}
-
-unsigned int NEConvolutionRectangleKernel::get_index(uint32_t val)
-{
- switch(val)
- {
- case 3:
- return 0;
- case 5:
- return 1;
- case 7:
- return 2;
- case 9:
- return 3;
- default:
- ARM_COMPUTE_ERROR("Not supported dimension size");
- return 0;
- }
-}
-
-template <typename OutputType, unsigned int rows, unsigned int cols>
-void NEConvolutionRectangleKernel::convolution(const Window &win)
-{
- static_assert(sizeof(OutputType) == sizeof(uint8_t) || sizeof(OutputType) == sizeof(int16_t), "The output buffer can only be u8 or s16");
- ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr);
-
- Iterator input(_input, win);
- Iterator output(_output, win);
-
- std::array<unsigned char *, rows> input_ptrs{ {} };
- const int16_t *conv = _convolution.data();
- const float32x4_t scale_val = vdupq_n_f32(1.0f / _scale);
- const int k_row_half = rows / 2;
- const int k_col_half = cols / 2;
-
- // Set row pointers
- for(int i = -k_row_half; i <= k_row_half; ++i)
- {
- input_ptrs[k_row_half + i] = _input->buffer() + _input->info()->offset_element_in_bytes(Coordinates(-k_col_half, i));
- }
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- int32x4_t out = vdupq_n_s32(0);
- int32x4_t out2 = vdupq_n_s32(0);
-
- // Perform appropriate convolution
- for(unsigned int r = 0; r < rows; ++r)
- {
- const uint8x16_t data = vld1q_u8(input_ptrs[r] + input.offset());
- if(3 == cols)
- {
- convolve_row3x1(out, out2, data, conv + r * cols);
- }
- else if(5 == cols)
- {
- convolve_row5x1(out, out2, data, conv + r * cols);
- }
- else if(7 == cols)
- {
- convolve_row7x1(out, out2, data, conv + r * cols);
- }
- else if(9 == cols)
- {
- convolve_row9x1(out, out2, data, conv + r * cols);
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported number of columns");
- }
- }
-
- // Apply scale
- if(_scale != 1)
- {
- // Convert to F32, scale and convert back to S32
- out = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out), scale_val));
- out2 = vcvtq_s32_f32(vmulq_f32(vcvtq_f32_s32(out2), scale_val));
- }
-
- // Clamp and store as U8 or S16:
- store_results(out, out2, reinterpret_cast<OutputType *>(output.ptr()));
- },
- input, output);
-}
-} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEConvolutionKernel.h b/src/core/NEON/kernels/NEConvolutionKernel.h
deleted file mode 100644
index b8bf1d169e..0000000000
--- a/src/core/NEON/kernels/NEConvolutionKernel.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-
-#include "src/core/NEON/INEKernel.h"
-#include "src/core/NEON/INESimpleKernel.h"
-
-#include <array>
-#include <cstdint>
-#include <vector>
-
-namespace arm_compute
-{
-class ITensor;
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
- * The client can supply a convolution matrix \f$ C_{m,n} \f$.
- * @f{eqnarray}{
- * k_0 &=& \frac{m}{2} \\
- * l_0 &=& \frac{n}{2} \\
- * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
- * @f}
- *
- * @note The above equation for this function is similar to the default OpenCV Filter2D function,
- * which actually computes a correlation and not a convolution.
- * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
- */
-template <unsigned int matrix_size>
-class NEConvolutionKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvolutionKernel";
- }
- /** Default constructor */
- NEConvolutionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEConvolutionKernel(const NEConvolutionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NEConvolutionKernel &operator=(const NEConvolutionKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvolutionKernel(NEConvolutionKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvolutionKernel &operator=(NEConvolutionKernel &&) = default;
- /** Default destructor */
- ~NEConvolutionKernel() = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- template <typename OutputType>
- void convolution(const Window &win);
-
-protected:
- uint32_t _scale; /**< scale of the convolution */
- std::array<int16_t, matrix_size *matrix_size> _convolution; /**< convolution matrix */
-};
-
-/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/
-using NEConvolution3x3Kernel = NEConvolutionKernel<3>;
-/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/
-using NEConvolution5x5Kernel = NEConvolutionKernel<5>;
-/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/
-using NEConvolution7x7Kernel = NEConvolutionKernel<7>;
-///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/
-using NEConvolution9x9Kernel = NEConvolutionKernel<9>;
-
-/****************************************************************************************\
- * Separable Square Convolution *
-\****************************************************************************************/
-
-/** Kernel for the Horizontal pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionHorKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NESeparableConvolutionHorKernel";
- }
- /** Default constructor */
- NESeparableConvolutionHorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NESeparableConvolutionHorKernel(const NESeparableConvolutionHorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NESeparableConvolutionHorKernel &operator=(const NESeparableConvolutionHorKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESeparableConvolutionHorKernel(NESeparableConvolutionHorKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESeparableConvolutionHorKernel &operator=(NESeparableConvolutionHorKernel &&) = default;
- /** Default destructor */
- ~NESeparableConvolutionHorKernel() = default;
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16, S16, S32.
- * @param[in] conv_row Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Apply the object's convolution to the given window of the input tensor..
- *
- * @param[in] window Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolve(const Window &window);
-
- std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>;
-/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>;
-/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>;
-
-/** Kernel for the Vertical pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionVertKernel : public INESimpleKernel
-{
-public:
- const char *name() const override
- {
- return "NESeparableConvolutionVertKernel";
- }
- /** Default constructor */
- NESeparableConvolutionVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NESeparableConvolutionVertKernel(const NESeparableConvolutionVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- NESeparableConvolutionVertKernel &operator=(const NESeparableConvolutionVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- NESeparableConvolutionVertKernel(NESeparableConvolutionVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- NESeparableConvolutionVertKernel &operator=(NESeparableConvolutionVertKernel &&) = default;
- /** Default destructor */
- ~NESeparableConvolutionVertKernel() = default;
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U16, S16, S32.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv_col Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as U16.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_u16(const Window &win);
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as S16.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_s16(const Window &win);
- /** Apply the object's convolution to the given window of the input tensor.
- * This function is used if the intermediate values have been stored as S32.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType>
- void convolution_s32(const Window &win);
-
- std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */
- uint32_t _scale; /**< Convolution's scale */
-};
-
-/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/
-using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>;
-/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/
-using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>;
-/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/
-using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-/** Kernel for the running convolution on a rectangle matrix.
- *
- * @note Supports combinations of 3,5,7 and 9.
- */
-class NEConvolutionRectangleKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEConvolutionRectangleKernel";
- }
- /** Default constructor */
- NEConvolutionRectangleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default;
- /** Default destructor */
- ~NEConvolutionRectangleKernel() = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data type supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- unsigned int get_index(uint32_t val);
- /** Apply the object's convolution to the given window of the input tensor.
- *
- * @param[in] win Window to apply the convolution on.
- */
- template <typename OutputType, unsigned int rows, unsigned int cols>
- void convolution(const Window &win);
-
-protected:
- const ITensor *_input; /**< Input tensor */
- ITensor *_output; /**< Output tensor */
- uint32_t _scale; /**< Scale of the convolution */
- std::vector<int16_t> _convolution; /**< Convolution matrix */
- BorderSize _border_size; /**< Calculated border width */
- uint32_t _func_idx; /**< Index used to specify convolution function to be used */
- const static unsigned int _nr_supported_sizes
- {
- 4
- }; /**< Number of supported permutations */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */
diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
deleted file mode 100644
index 9f5dfcdcdb..0000000000
--- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <arm_neon.h>
-#include <cstddef>
-
-using namespace arm_compute;
-
-namespace arm_compute
-{
-class Coordinates;
-} // namespace arm_compute
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-namespace fp16
-{
-inline void mask_top(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask)
-{
- // vc > nc.val[0], vc > nc.val[1], vc > nc.val[2]
- mask = vandq_u16(mask, vcgeq_f16(vc, in0));
- mask = vandq_u16(mask, vcgeq_f16(vc, vextq_f16(in0, in1, 1)));
- mask = vandq_u16(mask, vcgeq_f16(vc, vextq_f16(in0, in1, 2)));
-}
-
-inline void mask_middle(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask)
-{
- // vc >= nc.val[0], vc > nc.val[2]
- mask = vandq_u16(mask, vcgeq_f16(vc, in0));
- mask = vandq_u16(mask, vcgtq_f16(vc, vextq_f16(in0, in1, 2)));
-}
-
-inline void mask_bottom(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask)
-{
- // vc > nc.val[0], vc > nc.val[1], vc > nc.val[2]
- mask = vandq_u16(mask, vcgtq_f16(vc, in0));
- mask = vandq_u16(mask, vcgtq_f16(vc, vextq_f16(in0, in1, 1)));
- mask = vandq_u16(mask, vcgtq_f16(vc, vextq_f16(in0, in1, 2)));
-}
-
-inline void non_maxima_suppression3x3_F32_F32(const void *__restrict in_ptr, void *__restrict out_ptr, const uint32_t in_stride)
-{
- auto in = static_cast<const float *__restrict>(in_ptr) - 1;
- const auto out = static_cast<float *__restrict>(out_ptr);
-
- // Get centre scores
- const float16x8x2_t vc =
- {
- vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 1)), vcvt_f16_f32(vld1q_f32(in + 5))),
- vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 9)), vcvt_f16_f32(vld1q_f32(in + 13)))
- };
-
- // Neighboring pixels
- in -= in_stride;
-
- static const float16x4_t zero_f16x4 = vdup_n_f16(0);
- static const uint16x8_t zero_u16 = vdupq_n_u16(0);
- static const uint16x8_t true_mask = vceqq_u16(zero_u16, zero_u16);
- static const uint16x8x2_t true_mask_x2 =
- {
- true_mask,
- true_mask
- };
-
- uint16x8x2_t mask = true_mask_x2;
-
- // Top row
- const float16x8_t tmp_top0 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in)), vcvt_f16_f32(vld1q_f32(in + 4)));
- const float16x8_t tmp_top1 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 8)), vcvt_f16_f32(vld1q_f32(in + 12)));
- const float16x8_t tmp_top2 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 16)), zero_f16x4);
-
- // vc >= nc.val[0], vc >= nc.val[1], vc >= nc.val[2]
- mask_top(vc.val[0], tmp_top0, tmp_top1, mask.val[0]);
- mask_top(vc.val[1], tmp_top1, tmp_top2, mask.val[1]);
-
- in += in_stride;
-
- // Middle row
- const float16x8_t tmp_mid0 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in)), vcvt_f16_f32(vld1q_f32(in + 4)));
- const float16x8_t tmp_mid1 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 8)), vcvt_f16_f32(vld1q_f32(in + 12)));
- const float16x8_t tmp_mid2 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 16)), zero_f16x4);
-
- // vc >= nc.val[0], vc > nc.val[2]
- mask_middle(vc.val[0], tmp_mid0, tmp_mid1, mask.val[0]);
- mask_middle(vc.val[1], tmp_mid1, tmp_mid2, mask.val[1]);
-
- in += in_stride;
-
- // Bottom row
- const float16x8_t tmp_bot0 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in)), vcvt_f16_f32(vld1q_f32(in + 4)));
- const float16x8_t tmp_bot1 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 8)), vcvt_f16_f32(vld1q_f32(in + 12)));
- const float16x8_t tmp_bot2 = vcombine_f16(vcvt_f16_f32(vld1q_f32(in + 16)), zero_f16x4);
-
- // vc > nc.val[0], vc > nc.val[1], vc > nc.val[2]
- mask_bottom(vc.val[0], tmp_bot0, tmp_bot1, mask.val[0]);
- mask_bottom(vc.val[1], tmp_bot1, tmp_bot2, mask.val[1]);
-
- // Store
- static const float16x8_t zero_f16x8 = vdupq_n_f16(0);
-
- const float16x8_t suppressed0 = vbslq_f16(mask.val[0], vc.val[0], zero_f16x8);
- vst1q_f32(out + 0, vcvt_f32_f16(vget_low_f16(suppressed0)));
- vst1q_f32(out + 4, vcvt_f32_f16(vget_high_f16(suppressed0)));
-
- const float16x8_t suppressed1 = vbslq_f16(mask.val[1], vc.val[1], zero_f16x8);
- vst1q_f32(out + 8, vcvt_f32_f16(vget_low_f16(suppressed1)));
- vst1q_f32(out + 12, vcvt_f32_f16(vget_high_f16(suppressed1)));
-}
-
-inline void non_maxima_suppression3x3_U8_U8(const void *__restrict in_ptr, void *__restrict out_ptr, const uint32_t in_stride)
-{
- auto in = static_cast<const uint8_t *__restrict>(in_ptr) - 1;
- const auto out = static_cast<uint8_t *__restrict>(out_ptr);
-
- // Get centre scores
- const uint8x16_t vc = vld1q_u8(in + 1);
-
- // Neighboring pixels
- in -= in_stride;
-
- // Top row
- const uint8x16_t l_nc_0 = vld1q_u8(in);
- const uint8x16_t m_nc_0 = vld1q_u8(in + 1);
- const uint8x16_t r_nc_0 = vld1q_u8(in + 2);
-
- // Keep center scores if ...
- // vc >= l_nc_0, vc >= m_nc_0, vc >= r_nc_0
- uint8x16_t mask = vcgeq_u8(vc, l_nc_0);
- mask = vandq_u8(mask, vcgeq_u8(vc, m_nc_0));
- mask = vandq_u8(mask, vcgeq_u8(vc, r_nc_0));
-
- in += in_stride;
-
- // Middle row
- const uint8x16_t l_nc_1 = vld1q_u8(in);
- const uint8x16_t r_nc_1 = vld1q_u8(in + 2);
-
- // ... and ...
- // vc >= l_nc_1, vc > r_nc_1
- mask = vandq_u8(mask, vcgeq_u8(vc, l_nc_1));
- mask = vandq_u8(mask, vcgtq_u8(vc, r_nc_1));
-
- in += in_stride;
-
- // Bottom row
- const uint8x16_t l_nc_2 = vld1q_u8(in);
- const uint8x16_t m_nc_2 = vld1q_u8(in + 1);
- const uint8x16_t r_nc_2 = vld1q_u8(in + 2);
-
- // ... and ...
- // vc > l_nc_2, vc > m_nc_2, vc > r_nc_2
- mask = vandq_u8(mask, vcgtq_u8(vc, l_nc_2));
- mask = vandq_u8(mask, vcgtq_u8(vc, m_nc_2));
- mask = vandq_u8(mask, vcgtq_u8(vc, r_nc_2));
-
- // Store
- static const uint8x16_t zero = vdupq_n_u8(0);
- vst1q_u8(out, vbslq_u8(mask, vc, zero));
-}
-} // namespace fp16
-
-void NENonMaximaSuppression3x3FP16Kernel::configure(const ITensor *input, ITensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _input = input;
- _output = output;
-
- switch(input->info()->data_type())
- {
- case DataType::U8:
- _func = &fp16::non_maxima_suppression3x3_U8_U8;
- break;
- default:
- _func = &fp16::non_maxima_suppression3x3_F32_F32;
- break;
- }
-
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- const unsigned int num_elems_read_per_iteration = 16 + 2 * border_size().left + (input->info()->data_type() == DataType::U8 ? 0 : 3);
- constexpr unsigned int num_elems_written_per_iteration = 16;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- INEKernel::configure(win);
-}
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace
-{
-inline void non_maxima_suppression3x3_FLOAT_FLOAT(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride)
-{
- auto input = static_cast<const float *__restrict>(input_ptr) - 1;
- const auto output = static_cast<float *__restrict>(output_ptr);
-
- // Get centre scores
- const float32x4x4_t vc =
- {
- {
- vld1q_f32(input + 1),
- vld1q_f32(input + 5),
- vld1q_f32(input + 9),
- vld1q_f32(input + 13)
- }
- };
-
- // Neighboring pixels
- float32x4x4_t l_nc{ {} };
- float32x4x4_t m_nc{ {} };
- float32x4x4_t r_nc{ {} };
-
- input -= input_stride;
-
- // Row0 - Low part
- float32x4_t tmp_low = vld1q_f32(input);
- float32x4_t tmp_high = vld1q_f32(input + 4);
- float32x4_t tmp_high1 = vld1q_f32(input + 8);
-
- l_nc.val[0] = tmp_low;
- m_nc.val[0] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[0] = vextq_f32(tmp_low, tmp_high, 2);
-
- tmp_low = tmp_high;
- tmp_high = tmp_high1;
-
- l_nc.val[1] = tmp_low;
- m_nc.val[1] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[1] = vextq_f32(tmp_low, tmp_high, 2);
-
- // Row0 - High part
- tmp_low = tmp_high1;
- tmp_high = vld1q_f32(input + 12);
- tmp_high1 = vld1q_f32(input + 16);
-
- l_nc.val[2] = tmp_low;
- m_nc.val[2] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[2] = vextq_f32(tmp_low, tmp_high, 2);
-
- tmp_low = tmp_high;
- tmp_high = tmp_high1;
-
- l_nc.val[3] = tmp_low;
- m_nc.val[3] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[3] = vextq_f32(tmp_low, tmp_high, 2);
-
- // mc >= nc.val[0], mc >= nc.val[1], mc >= nc.val[2]
- uint32x4x4_t mask{ {} };
- mask.val[0] = vcgeq_f32(vc.val[0], l_nc.val[0]);
- mask.val[0] = vandq_u32(mask.val[0], vcgeq_f32(vc.val[0], m_nc.val[0]));
- mask.val[0] = vandq_u32(mask.val[0], vcgeq_f32(vc.val[0], r_nc.val[0]));
- mask.val[1] = vcgeq_f32(vc.val[1], l_nc.val[1]);
- mask.val[1] = vandq_u32(mask.val[1], vcgeq_f32(vc.val[1], m_nc.val[1]));
- mask.val[1] = vandq_u32(mask.val[1], vcgeq_f32(vc.val[1], r_nc.val[1]));
- mask.val[2] = vcgeq_f32(vc.val[2], l_nc.val[2]);
- mask.val[2] = vandq_u32(mask.val[2], vcgeq_f32(vc.val[2], m_nc.val[2]));
- mask.val[2] = vandq_u32(mask.val[2], vcgeq_f32(vc.val[2], r_nc.val[2]));
- mask.val[3] = vcgeq_f32(vc.val[3], l_nc.val[3]);
- mask.val[3] = vandq_u32(mask.val[3], vcgeq_f32(vc.val[3], m_nc.val[3]));
- mask.val[3] = vandq_u32(mask.val[3], vcgeq_f32(vc.val[3], r_nc.val[3]));
-
- input += input_stride;
-
- // Row1 - Low part
- tmp_low = vld1q_f32(input);
- tmp_high = vld1q_f32(input + 4);
- tmp_high1 = vld1q_f32(input + 8);
-
- l_nc.val[0] = tmp_low;
- r_nc.val[0] = vextq_f32(tmp_low, tmp_high, 2);
-
- tmp_low = tmp_high;
- tmp_high = tmp_high1;
-
- l_nc.val[1] = tmp_low;
- r_nc.val[1] = vextq_f32(tmp_low, tmp_high, 2);
-
- // Row1 - High part
- tmp_low = tmp_high1;
- tmp_high = vld1q_f32(input + 12);
- tmp_high1 = vld1q_f32(input + 16);
-
- l_nc.val[2] = tmp_low;
- r_nc.val[2] = vextq_f32(tmp_low, tmp_high, 2);
-
- tmp_low = tmp_high;
- tmp_high = tmp_high1;
-
- l_nc.val[3] = tmp_low;
- r_nc.val[3] = vextq_f32(tmp_low, tmp_high, 2);
-
- // mc >= nc.val[0], mc > nc.val[2]
- mask.val[0] = vandq_u32(mask.val[0], vcgeq_f32(vc.val[0], l_nc.val[0]));
- mask.val[0] = vandq_u32(mask.val[0], vcgtq_f32(vc.val[0], r_nc.val[0]));
- mask.val[1] = vandq_u32(mask.val[1], vcgeq_f32(vc.val[1], l_nc.val[1]));
- mask.val[1] = vandq_u32(mask.val[1], vcgtq_f32(vc.val[1], r_nc.val[1]));
- mask.val[2] = vandq_u32(mask.val[2], vcgeq_f32(vc.val[2], l_nc.val[2]));
- mask.val[2] = vandq_u32(mask.val[2], vcgtq_f32(vc.val[2], r_nc.val[2]));
- mask.val[3] = vandq_u32(mask.val[3], vcgeq_f32(vc.val[3], l_nc.val[3]));
- mask.val[3] = vandq_u32(mask.val[3], vcgtq_f32(vc.val[3], r_nc.val[3]));
-
- input += input_stride;
-
- // Row2 - Low part
- tmp_low = vld1q_f32(input);
- tmp_high = vld1q_f32(input + 4);
- tmp_high1 = vld1q_f32(input + 8);
-
- l_nc.val[0] = tmp_low;
- m_nc.val[0] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[0] = vextq_f32(tmp_low, tmp_high, 2);
-
- tmp_low = tmp_high;
- tmp_high = tmp_high1;
-
- l_nc.val[1] = tmp_low;
- m_nc.val[1] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[1] = vextq_f32(tmp_low, tmp_high, 2);
-
- // Row2 - High part
- tmp_low = tmp_high1;
- tmp_high = vld1q_f32(input + 12);
- tmp_high1 = vld1q_f32(input + 16);
-
- l_nc.val[2] = tmp_low;
- m_nc.val[2] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[2] = vextq_f32(tmp_low, tmp_high, 2);
-
- tmp_low = tmp_high;
- tmp_high = tmp_high1;
-
- l_nc.val[3] = tmp_low;
- m_nc.val[3] = vextq_f32(tmp_low, tmp_high, 1);
- r_nc.val[3] = vextq_f32(tmp_low, tmp_high, 2);
-
- // mc > nc.val[0], mc > nc.val[1], mc > nc.val[2]
- mask.val[0] = vandq_u32(mask.val[0], vcgtq_f32(vc.val[0], l_nc.val[0]));
- mask.val[0] = vandq_u32(mask.val[0], vcgtq_f32(vc.val[0], m_nc.val[0]));
- mask.val[0] = vandq_u32(mask.val[0], vcgtq_f32(vc.val[0], r_nc.val[0]));
- mask.val[1] = vandq_u32(mask.val[1], vcgtq_f32(vc.val[1], l_nc.val[1]));
- mask.val[1] = vandq_u32(mask.val[1], vcgtq_f32(vc.val[1], m_nc.val[1]));
- mask.val[1] = vandq_u32(mask.val[1], vcgtq_f32(vc.val[1], r_nc.val[1]));
- mask.val[2] = vandq_u32(mask.val[2], vcgtq_f32(vc.val[2], l_nc.val[2]));
- mask.val[2] = vandq_u32(mask.val[2], vcgtq_f32(vc.val[2], m_nc.val[2]));
- mask.val[2] = vandq_u32(mask.val[2], vcgtq_f32(vc.val[2], r_nc.val[2]));
- mask.val[3] = vandq_u32(mask.val[3], vcgtq_f32(vc.val[3], l_nc.val[3]));
- mask.val[3] = vandq_u32(mask.val[3], vcgtq_f32(vc.val[3], m_nc.val[3]));
- mask.val[3] = vandq_u32(mask.val[3], vcgtq_f32(vc.val[3], r_nc.val[3]));
-
- static const float32x4_t zero = vdupq_n_f32(0.f);
-
- // Store
- vst1q_f32(output + 0, vbslq_f32(mask.val[0], vc.val[0], zero));
- vst1q_f32(output + 4, vbslq_f32(mask.val[1], vc.val[1], zero));
- vst1q_f32(output + 8, vbslq_f32(mask.val[2], vc.val[2], zero));
- vst1q_f32(output + 12, vbslq_f32(mask.val[3], vc.val[3], zero));
-}
-
-inline void non_maxima_suppression3x3_U8_U8(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride)
-{
- auto input = static_cast<const uint8_t *__restrict>(input_ptr) - 1;
- const auto output = static_cast<uint8_t *__restrict>(output_ptr);
-
- // Get centre scores
- const uint8x16_t vc = vld1q_u8(input + 1);
-
- // Neighboring pixels
- uint8x16_t l_nc{};
- uint8x16_t m_nc{};
- uint8x16_t r_nc{};
-
- input -= input_stride;
-
- // Row0
- l_nc = vld1q_u8(input);
- m_nc = vld1q_u8(input + 1);
- r_nc = vld1q_u8(input + 2);
-
- // mc >= l_nc, mc >= m_nc, mc >= r_nc
- uint8x16_t mask = vcgeq_u8(vc, l_nc);
- mask = vandq_u8(mask, vcgeq_u8(vc, m_nc));
- mask = vandq_u8(mask, vcgeq_u8(vc, r_nc));
-
- input += input_stride;
-
- // Row1
- l_nc = vld1q_u8(input);
- r_nc = vld1q_u8(input + 2);
-
- // mc >= l_nc, mc > r_nc
- mask = vandq_u8(mask, vcgeq_u8(vc, l_nc));
- mask = vandq_u8(mask, vcgtq_u8(vc, r_nc));
-
- input += input_stride;
-
- // Row2
- l_nc = vld1q_u8(input);
- m_nc = vld1q_u8(input + 1);
- r_nc = vld1q_u8(input + 2);
-
- // mc > l_nc, mc > m_nc, mc > r_nc
- mask = vandq_u8(mask, vcgtq_u8(vc, l_nc));
- mask = vandq_u8(mask, vcgtq_u8(vc, m_nc));
- mask = vandq_u8(mask, vcgtq_u8(vc, r_nc));
-
- static const uint8x16_t zero = vdupq_n_u8(0);
-
- // Store
- vst1q_u8(output, vbslq_u8(mask, vc, zero));
-}
-} // namespace
-
-NENonMaximaSuppression3x3Kernel::NENonMaximaSuppression3x3Kernel()
- : _func(nullptr), _input(nullptr), _output(nullptr)
-{
-}
-
-BorderSize NENonMaximaSuppression3x3Kernel::border_size() const
-{
- return BorderSize(1);
-}
-
-void NENonMaximaSuppression3x3Kernel::configure(const ITensor *input, ITensor *output, bool border_undefined)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _input = input;
- _output = output;
-
- if(input->info()->data_type() == DataType::U8)
- {
- _func = &non_maxima_suppression3x3_U8_U8;
- }
- else
- {
- _func = &non_maxima_suppression3x3_FLOAT_FLOAT;
- }
-
- constexpr unsigned int num_elems_processed_per_iteration = 16;
- const unsigned int num_elems_read_per_iteration = 16 + 2 * border_size().left + (input->info()->data_type() == DataType::U8 ? 0 : 3);
- constexpr unsigned int num_elems_written_per_iteration = 16;
- constexpr unsigned int num_rows_read_per_iteration = 3;
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
- update_window_and_padding(win,
- AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration),
- output_access);
-
- output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
-
- INEKernel::configure(win);
-}
-
-void NENonMaximaSuppression3x3Kernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_func == nullptr);
- Iterator input(_input, window);
- Iterator output(_output, window);
-
- const size_t input_stride = _input->info()->strides_in_bytes()[1] / element_size_from_data_type(_input->info()->data_type());
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- _func(input.ptr(), output.ptr(), input_stride);
- },
- input, output);
-}
diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
deleted file mode 100644
index 4194dac68e..0000000000
--- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-
-#include "src/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface to perform Non-Maxima suppression over a 3x3 window using Neon
- *
- */
-class NENonMaximaSuppression3x3Kernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NENonMaximaSuppression3x3Kernel";
- }
- /** Default constructor */
- NENonMaximaSuppression3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default;
- /** Default destructor */
- ~NENonMaximaSuppression3x3Kernel() = default;
-
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-protected:
- /** Common signature for all the specialised non-maxima suppression 3x3 functions
- *
- * @param[in] input_ptr Pointer to the input tensor.
- * @param[out] output_ptr Pointer to the output tensor
- * @param[in] input_stride Stride of the input tensor
- */
- using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride);
-
- NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Neon kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
- */
-class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
-{
-public:
- const char *name() const override
- {
- return "NENonMaximaSuppression3x3FP16Kernel";
- }
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ITensor *input, ITensor *output, bool border_undefined);
-};
-#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** Neon kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
-using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */
diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp
new file mode 100644
index 0000000000..24d0dd82e8
--- /dev/null
+++ b/src/core/NEON/kernels/NERemapKernel.cpp
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/NEON/kernels/NERemapKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/AccessWindowStatic.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/ScaleHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include <arm_neon.h>
+#include <cstddef>
+#include <cstdint>
+
+using namespace arm_compute;
+
+namespace arm_compute
+{
+class Coordinates;
+} // namespace arm_compute
+
+namespace
+{
+inline int32x4_t offset_nearest_interpolation(const float *mapx_ptr, const float *mapy_ptr, const float32x4_t &width, const float32x4_t &height, const int32x4_t &stride)
+{
+ const float32x4_t lowerxy = vdupq_n_f32(-1.f);
+
+ float32x4_t x = vld1q_f32(mapx_ptr);
+ float32x4_t y = vld1q_f32(mapy_ptr);
+
+ // Clamp x coordinates
+ x = vmaxq_f32(lowerxy, vminq_f32(x, width));
+ y = vmaxq_f32(lowerxy, vminq_f32(y, height));
+
+ const int32x4_t x_s32 = vcvtq_s32_f32(x);
+ const int32x4_t y_s32 = vcvtq_s32_f32(y);
+
+ return vmlaq_s32(x_s32, y_s32, stride);
+}
+
+} // namespace
+
+NERemapKernel::NERemapKernel()
+ : _func(nullptr), _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr)
+{
+}
+
+BorderSize NERemapKernel::border_size() const
+{
+ return BorderSize(1);
+}
+
+void NERemapKernel::configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
+
+ _input = input;
+ _output = output;
+ _map_x = map_x;
+ _map_y = map_y;
+
+ switch(policy)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ {
+ _func = &NERemapKernel::remap_nearest;
+ break;
+ }
+ case InterpolationPolicy::BILINEAR:
+ {
+ _func = &NERemapKernel::remap_bilinear;
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Unsupported interpolation mode");
+ break;
+ }
+
+ constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+
+ const int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration);
+ const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0);
+
+ AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom);
+
+ AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal mapx_access(map_x->info(), 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal mapy_access(map_y->info(), 0, num_elems_processed_per_iteration);
+
+ update_window_and_padding(win, input_access, mapx_access, mapy_access, output_access);
+
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+ INEKernel::configure(win);
+}
+
+void NERemapKernel::remap_nearest(const Window &window)
+{
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+ Iterator mapx(_map_x, window);
+ Iterator mapy(_map_y, window);
+
+ const float32x4_t width = vdupq_n_f32(static_cast<float>(_input->info()->dimension(0)));
+ const float32x4_t height = vdupq_n_f32(static_cast<float>(_input->info()->dimension(1)));
+ const int32x4_t in_stride = vdupq_n_s32(static_cast<int32_t>(_input->info()->strides_in_bytes()[1]));
+
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const auto mapx_ptr = reinterpret_cast<const float *>(mapx.ptr());
+ const auto mapy_ptr = reinterpret_cast<const float *>(mapy.ptr());
+ const uint8_t *in_ptr = in.ptr();
+
+ const int32x4_t offset0 = offset_nearest_interpolation(mapx_ptr + 0, mapy_ptr + 0, width, height, in_stride);
+ const int32x4_t offset1 = offset_nearest_interpolation(mapx_ptr + 4, mapy_ptr + 4, width, height, in_stride);
+ const int32x4_t offset2 = offset_nearest_interpolation(mapx_ptr + 8, mapy_ptr + 8, width, height, in_stride);
+ const int32x4_t offset3 = offset_nearest_interpolation(mapx_ptr + 12, mapy_ptr + 12, width, height, in_stride);
+
+ uint8x16_t tmp = vdupq_n_u8(0);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 0)], tmp, 0);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 1)], tmp, 1);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 2)], tmp, 2);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset0, 3)], tmp, 3);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 0)], tmp, 4);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 1)], tmp, 5);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 2)], tmp, 6);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset1, 3)], tmp, 7);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 0)], tmp, 8);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 1)], tmp, 9);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 2)], tmp, 10);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset2, 3)], tmp, 11);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 0)], tmp, 12);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 1)], tmp, 13);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 2)], tmp, 14);
+ tmp = vsetq_lane_u8(in_ptr[vgetq_lane_s32(offset3, 3)], tmp, 15);
+ vst1q_u8(out.ptr(), tmp);
+ },
+ in, out, mapx, mapy);
+}
+
+void NERemapKernel::remap_bilinear(const Window &window)
+{
+ using namespace scale_helpers;
+
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+ Iterator mapx(_map_x, window);
+ Iterator mapy(_map_y, window);
+
+ const size_t width = _input->info()->dimension(0);
+ const size_t height = _input->info()->dimension(1);
+ const size_t in_stride = _input->info()->strides_in_bytes()[1];
+
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const auto mapx_ptr = reinterpret_cast<float *>(mapx.ptr());
+ const auto mapy_ptr = reinterpret_cast<float *>(mapy.ptr());
+ const uint8_t *in_ptr = in.ptr();
+
+ uint8x8_t tmp0 = vdup_n_u8(0);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[0], mapy_ptr[0]), tmp0, 0);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[1], mapy_ptr[1]), tmp0, 1);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[2], mapy_ptr[2]), tmp0, 2);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[3], mapy_ptr[3]), tmp0, 3);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[4], mapy_ptr[4]), tmp0, 4);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[5], mapy_ptr[5]), tmp0, 5);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[6], mapy_ptr[6]), tmp0, 6);
+ tmp0 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[7], mapy_ptr[7]), tmp0, 7);
+
+ uint8x8_t tmp1 = vdup_n_u8(0);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[8], mapy_ptr[8]), tmp1, 0);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[9], mapy_ptr[9]), tmp1, 1);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[10], mapy_ptr[10]), tmp1, 2);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[11], mapy_ptr[11]), tmp1, 3);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[12], mapy_ptr[12]), tmp1, 4);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[13], mapy_ptr[13]), tmp1, 5);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[14], mapy_ptr[14]), tmp1, 6);
+ tmp1 = vset_lane_u8(pixel_bilinear_c1_clamp(in_ptr, in_stride, width, height, mapx_ptr[15], mapy_ptr[15]), tmp1, 7);
+
+ vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
+ },
+ in, out, mapx, mapy);
+}
+
+void NERemapKernel::run(const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON(_func == nullptr);
+
+ (this->*_func)(window);
+}
diff --git a/src/core/NEON/kernels/NERemapKernel.h b/src/core/NEON/kernels/NERemapKernel.h
new file mode 100644
index 0000000000..adc7f4bdd5
--- /dev/null
+++ b/src/core/NEON/kernels/NERemapKernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2016-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_NEREMAPKERNEL_H
+#define ARM_COMPUTE_NEREMAPKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Neon kernel to perform a remap on a tensor */
+class NERemapKernel : public INEKernel
+{
+public:
+ const char *name() const override
+ {
+ return "NERemapKernel";
+ }
+ /** Default constructor */
+ NERemapKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERemapKernel(const NERemapKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NERemapKernel &operator=(const NERemapKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ NERemapKernel(NERemapKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ NERemapKernel &operator=(NERemapKernel &&) = default;
+ /** Default destructor */
+ ~NERemapKernel() = default;
+
+ /** Initialize the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data type supported: U8.
+ * @param[in] map_x Map for X coordinates. Data type supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data type supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, const ThreadInfo &info) override;
+ BorderSize border_size() const override;
+
+private:
+ /** function to perform nearest interpolation on the given window */
+ void remap_nearest(const Window &window);
+ /** function to perform bilinear interpolation on the given window */
+ void remap_bilinear(const Window &window);
+ /** Remap function to use for the particular interpolation type passed to configure() */
+ void (NERemapKernel::*_func)(const Window &window);
+
+ const ITensor *_input; /**< Input image */
+ ITensor *_output; /**< Output image */
+ const ITensor *_map_x; /**< Input remap x coordinates */
+ const ITensor *_map_y; /**< Input remap y coordinates */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */
diff --git a/src/runtime/CL/functions/CLAbsoluteDifference.cpp b/src/runtime/CL/functions/CLAbsoluteDifference.cpp
deleted file mode 100644
index ff5b0a864d..0000000000
--- a/src/runtime/CL/functions/CLAbsoluteDifference.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
-
-#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLAbsoluteDifference::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
-}
-
-void CLAbsoluteDifference::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
-{
- auto k = std::make_unique<CLAbsoluteDifferenceKernel>();
- k->configure(compile_context, input1, input2, output);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLAccumulate.cpp b/src/runtime/CL/functions/CLAccumulate.cpp
deleted file mode 100644
index 44020fd816..0000000000
--- a/src/runtime/CL/functions/CLAccumulate.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
-
-#include "src/core/CL/kernels/CLAccumulateKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLAccumulate::configure(const ICLTensor *input, ICLTensor *accum)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, accum);
-}
-
-void CLAccumulate::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum)
-{
- auto k = std::make_unique<CLAccumulateKernel>();
- k->configure(compile_context, input, accum);
- _kernel = std::move(k);
-}
-
-void CLAccumulateWeighted::configure(const ICLTensor *input, float alpha, ICLTensor *accum)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, alpha, accum);
-}
-
-void CLAccumulateWeighted::configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum)
-{
- auto k = std::make_unique<CLAccumulateWeightedKernel>();
- k->configure(compile_context, input, alpha, accum);
- _kernel = std::move(k);
-}
-
-void CLAccumulateSquared::configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, shift, accum);
-}
-
-void CLAccumulateSquared::configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum)
-{
- auto k = std::make_unique<CLAccumulateSquaredKernel>();
- k->configure(compile_context, input, shift, accum);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLBox3x3.cpp b/src/runtime/CL/functions/CLBox3x3.cpp
deleted file mode 100644
index 09e24d1bc0..0000000000
--- a/src/runtime/CL/functions/CLBox3x3.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLBox3x3Kernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLBox3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
-}
-
-void CLBox3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLBox3x3Kernel>();
- k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp
deleted file mode 100644
index 7e99a1bbb3..0000000000
--- a/src/runtime/CL/functions/CLCannyEdge.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
-#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
-#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
-#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
-
-using namespace arm_compute;
-
-CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _sobel(),
- _gradient(std::make_unique<CLGradientKernel>()),
- _border_mag_gradient(std::make_unique<CLFillBorderKernel>()),
- _non_max_suppr(std::make_unique<CLEdgeNonMaxSuppressionKernel>()),
- _edge_trace(std::make_unique<CLEdgeTraceKernel>()),
- _gx(),
- _gy(),
- _mag(),
- _phase(),
- _nonmax(),
- _visited(),
- _recorded(),
- _l1_list_counter(),
- _l1_stack(),
- _output(nullptr)
-{
-}
-
-CLCannyEdge::~CLCannyEdge() = default;
-
-void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, gradient_size, norm_type, border_mode, constant_border_value);
-}
-
-void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type,
- BorderMode border_mode,
- uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type));
- ARM_COMPUTE_ERROR_ON((gradient_size != 3) && (gradient_size != 5) && (gradient_size != 7));
- ARM_COMPUTE_ERROR_ON((lower_thr < 0) || (lower_thr >= upper_thr));
-
- _output = output;
-
- const unsigned int L1_hysteresis_stack_size = 8;
- const TensorShape shape = input->info()->tensor_shape();
-
- TensorInfo gradient_info;
- TensorInfo info;
-
- // Initialize images
- if(gradient_size < 7)
- {
- gradient_info.init(shape, 1, arm_compute::DataType::S16);
- info.init(shape, 1, arm_compute::DataType::U16);
- }
- else
- {
- gradient_info.init(shape, 1, arm_compute::DataType::S32);
- info.init(shape, 1, arm_compute::DataType::U32);
- }
-
- _gx.allocator()->init(gradient_info);
- _gy.allocator()->init(gradient_info);
- _mag.allocator()->init(info);
- _nonmax.allocator()->init(info);
-
- TensorInfo info_u8(shape, 1, arm_compute::DataType::U8);
- _phase.allocator()->init(info_u8);
- _l1_list_counter.allocator()->init(info_u8);
-
- TensorInfo info_u32(shape, 1, arm_compute::DataType::U32);
- _visited.allocator()->init(info_u32);
- _recorded.allocator()->init(info_u32);
-
- TensorShape shape_l1_stack = input->info()->tensor_shape();
- shape_l1_stack.set(0, input->info()->dimension(0) * L1_hysteresis_stack_size);
- TensorInfo info_s32(shape_l1_stack, 1, arm_compute::DataType::S32);
- _l1_stack.allocator()->init(info_s32);
-
- // Manage intermediate buffers
- _memory_group.manage(&_gx);
- _memory_group.manage(&_gy);
-
- // Configure/Init sobelNxN
- if(gradient_size == 3)
- {
- auto k = std::make_unique<CLSobel3x3>();
- k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
- _sobel = std::move(k);
- }
- else if(gradient_size == 5)
- {
- auto k = std::make_unique<CLSobel5x5>();
- k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
- _sobel = std::move(k);
- }
- else if(gradient_size == 7)
- {
- auto k = std::make_unique<CLSobel7x7>();
- k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
- _sobel = std::move(k);
- }
- else
- {
- ARM_COMPUTE_ERROR_VAR("Gradient size %d not supported", gradient_size);
- }
-
- // Manage intermediate buffers
- _memory_group.manage(&_mag);
- _memory_group.manage(&_phase);
-
- // Configure gradient
- _gradient->configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type);
-
- // Allocate intermediate buffers
- _gx.allocator()->allocate();
- _gy.allocator()->allocate();
-
- // Manage intermediate buffers
- _memory_group.manage(&_nonmax);
-
- // Configure non-maxima suppression
- _non_max_suppr->configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
-
- // Allocate intermediate buffers
- _phase.allocator()->allocate();
-
- // Fill border around magnitude image as non-maxima suppression will access
- // it. If border mode is undefined filling the border is a nop.
- _border_mag_gradient->configure(compile_context, &_mag, _non_max_suppr->border_size(), border_mode, constant_border_value);
-
- // Allocate intermediate buffers
- _mag.allocator()->allocate();
-
- // Manage intermediate buffers
- _memory_group.manage(&_visited);
- _memory_group.manage(&_recorded);
- _memory_group.manage(&_l1_stack);
- _memory_group.manage(&_l1_list_counter);
-
- // Configure edge tracing
- _edge_trace->configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
-
- // Allocate intermediate buffers
- _visited.allocator()->allocate();
- _recorded.allocator()->allocate();
- _l1_stack.allocator()->allocate();
- _l1_list_counter.allocator()->allocate();
- _nonmax.allocator()->allocate();
-}
-
-void CLCannyEdge::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Run sobel
- _sobel->run();
-
- // Run phase and magnitude calculation
- CLScheduler::get().enqueue(*_gradient, false);
-
- // Fill border before non-maxima suppression. Nop for border mode undefined.
- CLScheduler::get().enqueue(*_border_mag_gradient, false);
-
- // Run non max suppresion
- _nonmax.clear(CLScheduler::get().queue());
- CLScheduler::get().enqueue(*_non_max_suppr, false);
-
- // Clear temporary structures and run edge trace
- _output->clear(CLScheduler::get().queue());
- _visited.clear(CLScheduler::get().queue());
- _recorded.clear(CLScheduler::get().queue());
- _l1_list_counter.clear(CLScheduler::get().queue());
- _l1_stack.clear(CLScheduler::get().queue());
- CLScheduler::get().enqueue(*_edge_trace, true);
-}
diff --git a/src/runtime/CL/functions/CLChannelCombine.cpp b/src/runtime/CL/functions/CLChannelCombine.cpp
deleted file mode 100644
index 543de9c653..0000000000
--- a/src/runtime/CL/functions/CLChannelCombine.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
-
-#include "src/core/CL/kernels/CLChannelCombineKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLChannelCombine::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, plane3, output);
-}
-
-void CLChannelCombine::configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
-{
- auto k = std::make_unique<CLChannelCombineKernel>();
- k->configure(compile_context, plane0, plane1, plane2, plane3, output);
- _kernel = std::move(k);
-}
-
-void CLChannelCombine::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, output);
-}
-
-void CLChannelCombine::configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
-{
- auto k = std::make_unique<CLChannelCombineKernel>();
- k->configure(compile_context, plane0, plane1, plane2, output);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLChannelExtract.cpp b/src/runtime/CL/functions/CLChannelExtract.cpp
deleted file mode 100644
index 645fc051cb..0000000000
--- a/src/runtime/CL/functions/CLChannelExtract.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
-
-#include "src/core/CL/kernels/CLChannelExtractKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLChannelExtract::configure(const ICLTensor *input, Channel channel, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
-}
-
-void CLChannelExtract::configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output)
-{
- auto k = std::make_unique<CLChannelExtractKernel>();
- k->configure(compile_context, input, channel, output);
- _kernel = std::move(k);
-}
-
-void CLChannelExtract::configure(const ICLMultiImage *input, Channel channel, ICLImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
-}
-
-void CLChannelExtract::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output)
-{
- auto k = std::make_unique<CLChannelExtractKernel>();
- k->configure(compile_context, input, channel, output);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLColorConvert.cpp b/src/runtime/CL/functions/CLColorConvert.cpp
deleted file mode 100644
index 9aeeb65dc4..0000000000
--- a/src/runtime/CL/functions/CLColorConvert.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
-
-#include "src/core/CL/kernels/CLColorConvertKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLColorConvert::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- auto k = std::make_unique<CLColorConvertKernel>();
- k->configure(compile_context, input, output);
- _kernel = std::move(k);
-}
-
-void CLColorConvert::configure(const ICLImage *input, ICLMultiImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output)
-{
- auto k = std::make_unique<CLColorConvertKernel>();
- k->configure(compile_context, input, output);
- _kernel = std::move(k);
-}
-
-void CLColorConvert::configure(const ICLMultiImage *input, ICLImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output)
-{
- auto k = std::make_unique<CLColorConvertKernel>();
- k->configure(compile_context, input, output);
- _kernel = std::move(k);
-}
-
-void CLColorConvert::configure(const ICLMultiImage *input, ICLMultiImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLColorConvert::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output)
-{
- auto k = std::make_unique<CLColorConvertKernel>();
- k->configure(compile_context, input, output);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLConvolution.cpp b/src/runtime/CL/functions/CLConvolution.cpp
deleted file mode 100644
index ffc7cda034..0000000000
--- a/src/runtime/CL/functions/CLConvolution.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLConvolution.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/core/CL/kernels/CLConvolutionKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLConvolution3x3::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_mode, constant_border_value);
-}
-
-void CLConvolution3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLConvolution3x3Kernel>();
- k->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
-
-template <unsigned int matrix_size>
-CLConvolutionSquare<matrix_size>::CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(std::make_unique<CLSeparableConvolutionHorKernel<matrix_size>>()),
- _kernel_vert(std::make_unique<CLSeparableConvolutionVertKernel<matrix_size>>()), _kernel(std::make_unique<CLConvolutionKernel<matrix_size>>()), _border_handler(std::make_unique<CLFillBorderKernel>())
-{
-}
-
-template <unsigned int matrix_size>
-CLConvolutionSquare<matrix_size>::~CLConvolutionSquare() = default;
-
-template <unsigned int matrix_size>
-void CLConvolutionSquare<matrix_size>::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_mode, constant_border_value);
-}
-
-template <unsigned int matrix_size>
-void CLConvolutionSquare<matrix_size>::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(conv == nullptr);
- std::array<int16_t, matrix_size> conv_col{ 0 };
- std::array<int16_t, matrix_size> conv_row{ 0 };
- _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
-
- if(_is_separable)
- {
- std::pair<DataType, DataType> type_pair = data_type_for_convolution(conv_col.data(), conv_row.data(), matrix_size);
- _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, type_pair.first));
-
- // Manage intermediate buffers
- _memory_group.manage(&_tmp);
-
- if(scale == 0)
- {
- scale = calculate_matrix_scale(conv, matrix_size);
- }
-
- _kernel_hor->configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert->configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
- _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
-
- // Allocate intermediate buffer
- _tmp.allocator()->allocate();
- }
- else
- {
- _kernel->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
- }
-}
-
-template <unsigned int matrix_size>
-void CLConvolutionSquare<matrix_size>::run()
-{
- CLScheduler::get().enqueue(*_border_handler);
-
- if(_is_separable)
- {
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- CLScheduler::get().enqueue(*_kernel_hor, false);
- CLScheduler::get().enqueue(*_kernel_vert);
- }
- else
- {
- CLScheduler::get().enqueue(*_kernel);
- }
-}
-
-template class arm_compute::CLConvolutionSquare<5>;
-template class arm_compute::CLConvolutionSquare<7>;
-template class arm_compute::CLConvolutionSquare<9>;
-
-void CLConvolutionRectangle::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, rows, cols, scale, border_mode, constant_border_value);
-}
-
-void CLConvolutionRectangle::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale,
- BorderMode border_mode, uint8_t constant_border_value)
-{
- border_mode = (border_mode == BorderMode::UNDEFINED) ? BorderMode::CONSTANT : border_mode;
- auto k = std::make_unique<CLConvolutionRectangleKernel>();
- k->configure(compile_context, input, output, conv, rows, cols, scale, false);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLDerivative.cpp b/src/runtime/CL/functions/CLDerivative.cpp
deleted file mode 100644
index 2e3ecf7700..0000000000
--- a/src/runtime/CL/functions/CLDerivative.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLDerivative.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLDerivativeKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLDerivative::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
-}
-
-void CLDerivative::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLDerivativeKernel>();
- k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLDilate.cpp b/src/runtime/CL/functions/CLDilate.cpp
deleted file mode 100644
index 92c5cc7ab1..0000000000
--- a/src/runtime/CL/functions/CLDilate.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLDilate.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLDilateKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLDilate::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
-}
-
-void CLDilate::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLDilateKernel>();
- k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLEqualizeHistogram.cpp b/src/runtime/CL/functions/CLEqualizeHistogram.cpp
deleted file mode 100644
index 11607cf71d..0000000000
--- a/src/runtime/CL/functions/CLEqualizeHistogram.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
-
-#include "arm_compute/core/CL/ICLDistribution1D.h"
-#include "arm_compute/core/CL/ICLLut.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLHistogramKernel.h"
-#include "src/core/CL/kernels/CLTableLookupKernel.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstddef>
-#include <numeric>
-
-using namespace arm_compute;
-
-namespace
-{
-void calculate_cum_dist_and_lut(CLDistribution1D &dist, CLDistribution1D &cum_dist, CLLut &lut)
-{
- dist.map(true);
- cum_dist.map(true);
- lut.map(true);
-
- const uint32_t *dist_ptr = dist.buffer();
- uint32_t *cum_dist_ptr = cum_dist.buffer();
- uint8_t *lut_ptr = lut.buffer();
-
- ARM_COMPUTE_ERROR_ON(dist_ptr == nullptr);
- ARM_COMPUTE_ERROR_ON(cum_dist_ptr == nullptr);
- ARM_COMPUTE_ERROR_ON(lut_ptr == nullptr);
-
- // Calculate cumulative distribution
- std::partial_sum(dist_ptr, dist_ptr + 256, cum_dist_ptr);
-
- // Get the number of pixels that have the lowest value in the input image
- const uint32_t num_lowest_pixels = *std::find_if(dist_ptr, dist_ptr + 256, [](const uint32_t &v)
- {
- return v > 0;
- });
- const size_t image_size = cum_dist_ptr[255];
-
- if(image_size == num_lowest_pixels)
- {
- std::iota(lut_ptr, lut_ptr + 256, 0);
- }
- else
- {
- const float diff = image_size - num_lowest_pixels;
-
- for(size_t i = 0; i < 256; ++i)
- {
- lut_ptr[i] = lround((cum_dist_ptr[i] - num_lowest_pixels) / diff * 255.f);
- }
- }
-
- dist.unmap();
- cum_dist.unmap();
- lut.unmap();
-}
-} // namespace
-
-CLEqualizeHistogram::CLEqualizeHistogram()
- : _histogram_kernel(std::make_unique<CLHistogramKernel>()),
- _border_histogram_kernel(std::make_unique<CLHistogramBorderKernel>()),
- _map_histogram_kernel(std::make_unique<CLTableLookupKernel>()),
- _hist(nr_bins, 0, max_range),
- _cum_dist(nr_bins, 0, max_range),
- _cd_lut(nr_bins, DataType::U8)
-{
-}
-
-CLEqualizeHistogram::~CLEqualizeHistogram() = default;
-
-void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLEqualizeHistogram::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output)
-{
- _histogram_kernel->configure(compile_context, input, &_hist);
- _border_histogram_kernel->configure(compile_context, input, &_hist);
- _map_histogram_kernel->configure(compile_context, input, &_cd_lut, output);
-}
-
-void CLEqualizeHistogram::run()
-{
- // Calculate histogram of input.
- CLScheduler::get().enqueue(*_histogram_kernel, false);
-
- // Calculate remaining pixels when image is not multiple of the elements of histogram kernel
- CLScheduler::get().enqueue(*_border_histogram_kernel, false);
-
- // Calculate cumulative distribution of histogram and create LUT.
- calculate_cum_dist_and_lut(_hist, _cum_dist, _cd_lut);
-
- // Map input to output using created LUT.
- CLScheduler::get().enqueue(*_map_histogram_kernel);
-}
diff --git a/src/runtime/CL/functions/CLErode.cpp b/src/runtime/CL/functions/CLErode.cpp
deleted file mode 100644
index 29551fc6bd..0000000000
--- a/src/runtime/CL/functions/CLErode.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLErode.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLErodeKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLErode::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
-}
-
-void CLErode::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLErodeKernel>();
- k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLFastCorners.cpp b/src/runtime/CL/functions/CLFastCorners.cpp
deleted file mode 100644
index a3a62d6d5e..0000000000
--- a/src/runtime/CL/functions/CLFastCorners.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/core/CL/kernels/CLFastCornersKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <algorithm>
-#include <cstring>
-
-using namespace arm_compute;
-
-CLFastCorners::CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)),
- _fast_corners_kernel(std::make_unique<CLFastCornersKernel>()),
- _suppr_func(),
- _copy_array_kernel(std::make_unique<CLCopyToArrayKernel>()),
- _output(),
- _suppr(),
- _win(),
- _non_max(false),
- _num_corners(nullptr),
- _num_buffer(),
- _corners(nullptr),
- _constant_border_value(0)
-{
-}
-
-CLFastCorners::~CLFastCorners() = default;
-
-void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners,
- unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, threshold, nonmax_suppression, corners, num_corners, border_mode, constant_border_value);
-}
-
-void CLFastCorners::configure(const CLCompileContext &compile_context, const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners,
- unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON(BorderMode::UNDEFINED != border_mode);
- ARM_COMPUTE_ERROR_ON(nullptr == corners);
- ARM_COMPUTE_ERROR_ON(threshold < 1 && threshold > 255);
-
- TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::U8);
- _output.allocator()->init(tensor_info);
-
- _non_max = nonmax_suppression;
- _num_corners = num_corners;
- _corners = corners;
- _num_buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int));
- _constant_border_value = constant_border_value;
-
- const bool update_number = (nullptr != _num_corners);
-
- _memory_group.manage(&_output);
- _fast_corners_kernel->configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode);
-
- if(!_non_max)
- {
- _copy_array_kernel->configure(compile_context, &_output, update_number, _corners, &_num_buffer);
- }
- else
- {
- _suppr.allocator()->init(tensor_info);
- _memory_group.manage(&_suppr);
-
- _suppr_func.configure(compile_context, &_output, &_suppr, border_mode);
- _copy_array_kernel->configure(compile_context, &_suppr, update_number, _corners, &_num_buffer);
-
- _suppr.allocator()->allocate();
- }
-
- // Allocate intermediate tensors
- _output.allocator()->allocate();
-}
-
-void CLFastCorners::run()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- if(_non_max)
- {
- ARM_COMPUTE_ERROR_ON_MSG(_output.cl_buffer().get() == nullptr, "Unconfigured function");
- const auto out_buffer = static_cast<unsigned char *>(q.enqueueMapBuffer(_output.cl_buffer(), CL_TRUE, CL_MAP_WRITE, 0, _output.info()->total_size()));
- memset(out_buffer, 0, _output.info()->total_size());
- q.enqueueUnmapMemObject(_output.cl_buffer(), out_buffer);
- }
-
- CLScheduler::get().enqueue(*_fast_corners_kernel, false);
-
- if(_non_max)
- {
- _suppr_func.run();
- }
-
- CLScheduler::get().enqueue(*_copy_array_kernel, false);
-
- unsigned int get_num_corners = 0;
- q.enqueueReadBuffer(_num_buffer, CL_TRUE, 0, sizeof(unsigned int), &get_num_corners);
-
- size_t corner_size = std::min(static_cast<size_t>(get_num_corners), _corners->max_num_values());
-
- _corners->resize(corner_size);
-
- if(_num_corners != nullptr)
- {
- *_num_corners = get_num_corners;
- }
-
- q.flush();
-}
diff --git a/src/runtime/CL/functions/CLGaussian3x3.cpp b/src/runtime/CL/functions/CLGaussian3x3.cpp
deleted file mode 100644
index 8eeade2f47..0000000000
--- a/src/runtime/CL/functions/CLGaussian3x3.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLGaussian3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
-}
-
-void CLGaussian3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLGaussian3x3Kernel>();
- k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLGaussian5x5.cpp b/src/runtime/CL/functions/CLGaussian5x5.cpp
deleted file mode 100644
index ee72fcbe11..0000000000
--- a/src/runtime/CL/functions/CLGaussian5x5.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-CLGaussian5x5::CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)),
- _kernel_hor(std::make_unique<CLGaussian5x5HorKernel>()),
- _kernel_vert(std::make_unique<CLGaussian5x5VertKernel>()),
- _border_handler(std::make_unique<CLFillBorderKernel>()),
- _tmp()
-{
-}
-
-CLGaussian5x5::~CLGaussian5x5() = default;
-
-void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
-}
-
-void CLGaussian5x5::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
-
- _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, DataType::U16));
-
- // Manage intermediate buffers
- _memory_group.manage(&_tmp);
-
- // Configure kernels
- _kernel_hor->configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED);
- _kernel_vert->configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED);
- _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
-
- // Allocate intermediate buffers
- _tmp.allocator()->allocate();
-}
-
-void CLGaussian5x5::run()
-{
- CLScheduler::get().enqueue(*_border_handler, false);
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- CLScheduler::get().enqueue(*_kernel_hor, false);
- CLScheduler::get().enqueue(*_kernel_vert);
-}
diff --git a/src/runtime/CL/functions/CLGaussianPyramid.cpp b/src/runtime/CL/functions/CLGaussianPyramid.cpp
deleted file mode 100644
index 9fe35f6f0e..0000000000
--- a/src/runtime/CL/functions/CLGaussianPyramid.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
-#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
-#include "src/core/CL/kernels/CLScaleKernel.h"
-
-#include <cstddef>
-
-using namespace arm_compute;
-
-CLGaussianPyramid::CLGaussianPyramid()
- : _input(nullptr), _pyramid(nullptr), _tmp()
-{
-}
-
-CLGaussianPyramid::~CLGaussianPyramid() = default;
-
-CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT
- : _horizontal_border_handler(),
- _vertical_border_handler(),
- _horizontal_reduction(),
- _vertical_reduction()
-{
-}
-
-CLGaussianPyramidHalf::~CLGaussianPyramidHalf() = default;
-
-void CLGaussianPyramidHalf::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, border_mode, constant_border_value);
-}
-
-void CLGaussianPyramidHalf::configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(pyramid == nullptr);
- ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width());
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height());
- ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_HALF != pyramid->info()->scale());
-
- // Constant value to use for vertical fill border when the border mode is CONSTANT
- const uint16_t pixel_value_u16 = static_cast<uint16_t>(constant_border_value) * 2 + static_cast<uint16_t>(constant_border_value) * 8 + static_cast<uint16_t>(constant_border_value) * 6;
-
- /* Get number of pyramid levels */
- const size_t num_levels = pyramid->info()->num_levels();
-
- _input = input;
- _pyramid = pyramid;
-
- if(num_levels > 1)
- {
- _horizontal_border_handler.reserve(num_levels - 1);
- _vertical_border_handler.reserve(num_levels - 1);
- _horizontal_reduction.reserve(num_levels - 1);
- _vertical_reduction.reserve(num_levels - 1);
-
- // Apply half scale to the X dimension of the tensor shape
- TensorShape tensor_shape = pyramid->info()->tensor_shape();
- tensor_shape.set(0, (pyramid->info()->width() + 1) * SCALE_PYRAMID_HALF);
-
- PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_HALF, tensor_shape, Format::U16);
- _tmp.init(pyramid_info);
-
- for(size_t i = 0; i < num_levels - 1; ++i)
- {
- /* Configure horizontal kernel */
- _horizontal_reduction.emplace_back(std::make_unique<CLGaussianPyramidHorKernel>());
- _horizontal_reduction.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
-
- /* Configure vertical kernel */
- _vertical_reduction.emplace_back(std::make_unique<CLGaussianPyramidVertKernel>());
- _vertical_reduction.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
-
- /* Configure border */
- _horizontal_border_handler.emplace_back(std::make_unique<CLFillBorderKernel>());
- _horizontal_border_handler.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction.back()->border_size(), border_mode, PixelValue(constant_border_value));
-
- /* Configure border */
- _vertical_border_handler.emplace_back(std::make_unique<CLFillBorderKernel>());
- _vertical_border_handler.back()->configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction.back()->border_size(), border_mode, PixelValue(pixel_value_u16));
- }
- _tmp.allocate();
- }
-}
-
-void CLGaussianPyramidHalf::run()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function");
-
- /* Get number of pyramid levels */
- const size_t num_levels = _pyramid->info()->num_levels();
-
- /* The first level of the pyramid has the input image */
- _pyramid->get_pyramid_level(0)->map(CLScheduler::get().queue(), true /* blocking */);
- _input->map(CLScheduler::get().queue(), true /* blocking */);
- _pyramid->get_pyramid_level(0)->copy_from(*_input);
-
- _input->unmap(CLScheduler::get().queue());
- _pyramid->get_pyramid_level(0)->unmap(CLScheduler::get().queue());
-
- for(unsigned int i = 0; i < num_levels - 1; ++i)
- {
- CLScheduler::get().enqueue(*_horizontal_border_handler[i], false);
- CLScheduler::get().enqueue(*_horizontal_reduction[i], false);
- CLScheduler::get().enqueue(*_vertical_border_handler[i], false);
- CLScheduler::get().enqueue(*_vertical_reduction[i], false);
- }
-}
-
-CLGaussianPyramidOrb::CLGaussianPyramidOrb() // NOLINT
- : _gauss5x5(),
- _scale_nearest()
-{
-}
-
-void CLGaussianPyramidOrb::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, border_mode, constant_border_value);
-}
-
-void CLGaussianPyramidOrb::configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
- ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width());
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height());
- ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_ORB != pyramid->info()->scale());
-
- /* Get number of pyramid levels */
- const size_t num_levels = pyramid->info()->num_levels();
-
- _input = input;
- _pyramid = pyramid;
-
- if(num_levels > 1)
- {
- _gauss5x5.resize(num_levels - 1);
- _scale_nearest.reserve(num_levels - 1);
-
- PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8);
-
- _tmp.init(pyramid_info);
-
- for(size_t i = 0; i < num_levels - 1; ++i)
- {
- /* Configure gaussian 5x5 */
- _gauss5x5[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
-
- /* Configure scale image kernel */
- _scale_nearest.emplace_back(std::make_unique<CLScaleKernel>());
- _scale_nearest.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, PixelValue(), SamplingPolicy::CENTER });
- }
-
- _tmp.allocate();
- }
-}
-
-void CLGaussianPyramidOrb::run()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function");
-
- /* Get number of pyramid levels */
- const size_t num_levels = _pyramid->info()->num_levels();
-
- /* The first level of the pyramid has the input image */
- _pyramid->get_pyramid_level(0)->map(CLScheduler::get().queue(), true /* blocking */);
- _input->map(CLScheduler::get().queue(), true /* blocking */);
- _pyramid->get_pyramid_level(0)->copy_from(*_input);
- _input->unmap(CLScheduler::get().queue());
- _pyramid->get_pyramid_level(0)->unmap(CLScheduler::get().queue());
-
- for(unsigned int i = 0; i < num_levels - 1; ++i)
- {
- _gauss5x5[i].run();
- CLScheduler::get().enqueue(*_scale_nearest[i]);
- }
-}
diff --git a/src/runtime/CL/functions/CLHOGDescriptor.cpp b/src/runtime/CL/functions/CLHOGDescriptor.cpp
deleted file mode 100644
index 8d9ea17d66..0000000000
--- a/src/runtime/CL/functions/CLHOGDescriptor.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/HOGInfo.h"
-#include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
-#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
-
-using namespace arm_compute;
-
-CLHOGDescriptor::CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)),
- _gradient(),
- _orient_bin(std::make_unique<CLHOGOrientationBinningKernel>()),
- _block_norm(std::make_unique<CLHOGBlockNormalizationKernel>()),
- _mag(),
- _phase(),
- _hog_space()
-{
-}
-
-CLHOGDescriptor::~CLHOGDescriptor() = default;
-
-void CLHOGDescriptor::configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, hog, border_mode, constant_border_value);
-}
-
-void CLHOGDescriptor::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(nullptr == output);
- ARM_COMPUTE_ERROR_ON(nullptr == hog);
-
- const HOGInfo *hog_info = hog->info();
- const size_t width = input->info()->dimension(Window::DimX);
- const size_t height = input->info()->dimension(Window::DimY);
- const size_t num_bins = hog_info->num_bins();
-
- Size2D cell_size = hog_info->cell_size();
-
- // Calculate number of cells along the x and y directions for the hog_space
- const size_t num_cells_x = width / cell_size.width;
- const size_t num_cells_y = height / cell_size.height;
-
- // TensorShape of the input image
- const TensorShape &shape_img = input->info()->tensor_shape();
-
- // TensorShape of the hog space
- TensorShape shape_hog_space = input->info()->tensor_shape();
- shape_hog_space.set(Window::DimX, num_cells_x);
- shape_hog_space.set(Window::DimY, num_cells_y);
-
- // Intitialize tensors for magnitude, phase and hog space
- TensorInfo info_mag(shape_img, Format::S16);
- _mag.allocator()->init(info_mag);
-
- TensorInfo info_phase(shape_img, Format::U8);
- _phase.allocator()->init(info_phase);
-
- TensorInfo info_space(shape_hog_space, num_bins, DataType::F32);
- _hog_space.allocator()->init(info_space);
-
- // Manage intermediate buffers
- _memory_group.manage(&_mag);
- _memory_group.manage(&_phase);
-
- // Initialise gradient kernel
- _gradient.configure(compile_context, input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value);
-
- // Manage intermediate buffers
- _memory_group.manage(&_hog_space);
-
- // Initialise orientation binning kernel
- _orient_bin->configure(compile_context, &_mag, &_phase, &_hog_space, hog->info());
-
- // Initialize HOG norm kernel
- _block_norm->configure(compile_context, &_hog_space, output, hog->info());
-
- // Allocate intermediate tensors
- _mag.allocator()->allocate();
- _phase.allocator()->allocate();
- _hog_space.allocator()->allocate();
-}
-
-void CLHOGDescriptor::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Run gradient
- _gradient.run();
-
- // Run orientation binning
- CLScheduler::get().enqueue(*_orient_bin, false);
-
- // Run block normalization
- CLScheduler::get().enqueue(*_block_norm);
-} \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLHOGDetector.cpp b/src/runtime/CL/functions/CLHOGDetector.cpp
deleted file mode 100644
index 365021c723..0000000000
--- a/src/runtime/CL/functions/CLHOGDetector.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
-
-#include <algorithm>
-
-using namespace arm_compute;
-
-CLHOGDetector::CLHOGDetector()
- : _hog_detector_kernel(std::make_unique<CLHOGDetectorKernel>()), _detection_windows(nullptr), _num_detection_windows()
-{
-}
-
-CLHOGDetector::~CLHOGDetector() = default;
-
-void CLHOGDetector::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, detection_window_stride, threshold, idx_class);
-}
-
-void CLHOGDetector::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
- float threshold, size_t idx_class)
-{
- _detection_windows = detection_windows;
-
- // Allocate buffer for storing the number of detected objects
- _num_detection_windows = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int));
-
- // Configure HOGDetectorKernel
- _hog_detector_kernel->configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class);
-}
-
-void CLHOGDetector::run()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- // Reset number of detections
- const unsigned int init_num_detection_windows = _detection_windows->num_values();
- q.enqueueWriteBuffer(_num_detection_windows, CL_FALSE, 0, sizeof(unsigned int), &init_num_detection_windows);
-
- // Run CLHOGDetectorKernel
- CLScheduler::get().enqueue(*_hog_detector_kernel);
-
- // Read number of detections
- unsigned int num_detection_windows = 0;
- q.enqueueReadBuffer(_num_detection_windows, CL_TRUE, 0, sizeof(unsigned int), &num_detection_windows);
-
- // Update the number of values stored in _detection_windows
- _detection_windows->resize(static_cast<size_t>(num_detection_windows));
-
- q.flush();
-} \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLHOGGradient.cpp b/src/runtime/CL/functions/CLHOGGradient.cpp
deleted file mode 100644
index f3aa527417..0000000000
--- a/src/runtime/CL/functions/CLHOGGradient.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
-
-using namespace arm_compute;
-
-CLHOGGradient::CLHOGGradient(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)),
- _derivative(),
- _mag_phase(std::make_unique<CLMagnitudePhaseKernel>()),
- _gx(),
- _gy()
-{
-}
-
-void CLHOGGradient::configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_magnitude, output_phase, phase_type, border_mode, constant_border_value);
-}
-
-void CLHOGGradient::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_magnitude, 1, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_phase, 1, DataType::U8);
-
- const TensorShape &shape_img = input->info()->tensor_shape();
-
- // Allocate image memory
- TensorInfo info(shape_img, Format::S16);
- _gx.allocator()->init(info);
- _gy.allocator()->init(info);
-
- // Manage intermediate buffers
- _memory_group.manage(&_gx);
- _memory_group.manage(&_gy);
-
- // Initialise derivate kernel
- _derivative.configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
-
- // Initialise magnitude/phase kernel
- if(PhaseType::UNSIGNED == phase_type)
- {
- _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED);
- }
- else
- {
- _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED);
- }
-
- // Allocate intermediate tensors
- _gx.allocator()->allocate();
- _gy.allocator()->allocate();
-}
-
-void CLHOGGradient::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Run derivative
- _derivative.run();
-
- // Run magnitude/phase kernel
- CLScheduler::get().enqueue(*_mag_phase);
-} \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp
deleted file mode 100644
index 2464e6cf9f..0000000000
--- a/src/runtime/CL/functions/CLHOGMultiDetection.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h"
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/Scheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
-#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
-#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
-
-using namespace arm_compute;
-
-CLHOGMultiDetection::CLHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _gradient_kernel(),
- _orient_bin_kernel(),
- _block_norm_kernel(),
- _hog_detect_kernel(),
- _non_maxima_kernel(),
- _hog_space(),
- _hog_norm_space(),
- _detection_windows(),
- _mag(),
- _phase(),
- _non_maxima_suppression(false),
- _num_orient_bin_kernel(0),
- _num_block_norm_kernel(0),
- _num_hog_detect_kernel(0)
-{
-}
-
-CLHOGMultiDetection::~CLHOGMultiDetection() = default;
-
-void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
- uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, multi_hog, detection_windows, detection_window_strides, border_mode, constant_border_value, threshold, non_maxima_suppression,
- min_distance);
-}
-
-void CLHOGMultiDetection::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows,
- ICLSize2DArray *detection_window_strides, BorderMode border_mode,
- uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(multi_hog);
- ARM_COMPUTE_ERROR_ON(nullptr == detection_windows);
- ARM_COMPUTE_ERROR_ON(detection_window_strides->num_values() != multi_hog->num_models());
-
- const size_t width = input->info()->dimension(Window::DimX);
- const size_t height = input->info()->dimension(Window::DimY);
- const TensorShape &shape_img = input->info()->tensor_shape();
- const size_t num_models = multi_hog->num_models();
- PhaseType phase_type = multi_hog->model(0)->info()->phase_type();
-
- size_t prev_num_bins = multi_hog->model(0)->info()->num_bins();
- Size2D prev_cell_size = multi_hog->model(0)->info()->cell_size();
- Size2D prev_block_size = multi_hog->model(0)->info()->block_size();
- Size2D prev_block_stride = multi_hog->model(0)->info()->block_stride();
-
- /* Check if CLHOGOrientationBinningKernel and CLHOGBlockNormalizationKernel kernels can be skipped for a specific HOG data-object
- *
- * 1) CLHOGOrientationBinningKernel and CLHOGBlockNormalizationKernel are skipped if the cell size and the number of bins don't change.
- * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th
- * 2) CLHOGBlockNormalizationKernel is skipped if the cell size, the number of bins and block size do not change.
- * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th
- *
- * @note Since the orientation binning and block normalization kernels can be skipped, we need to keep track of the input to process for each kernel
- * with "input_orient_bin", "input_hog_detect" and "input_block_norm"
- */
- std::vector<size_t> input_orient_bin;
- std::vector<size_t> input_hog_detect;
- std::vector<std::pair<size_t, size_t>> input_block_norm;
-
- input_orient_bin.push_back(0);
- input_hog_detect.push_back(0);
- input_block_norm.emplace_back(0, 0);
-
- for(size_t i = 1; i < num_models; ++i)
- {
- size_t cur_num_bins = multi_hog->model(i)->info()->num_bins();
- Size2D cur_cell_size = multi_hog->model(i)->info()->cell_size();
- Size2D cur_block_size = multi_hog->model(i)->info()->block_size();
- Size2D cur_block_stride = multi_hog->model(i)->info()->block_stride();
-
- if((cur_num_bins != prev_num_bins) || (cur_cell_size.width != prev_cell_size.width) || (cur_cell_size.height != prev_cell_size.height))
- {
- prev_num_bins = cur_num_bins;
- prev_cell_size = cur_cell_size;
- prev_block_size = cur_block_size;
- prev_block_stride = cur_block_stride;
-
- // Compute orientation binning and block normalization kernels. Update input to process
- input_orient_bin.push_back(i);
- input_block_norm.emplace_back(i, input_orient_bin.size() - 1);
- }
- else if((cur_block_size.width != prev_block_size.width) || (cur_block_size.height != prev_block_size.height) || (cur_block_stride.width != prev_block_stride.width)
- || (cur_block_stride.height != prev_block_stride.height))
- {
- prev_block_size = cur_block_size;
- prev_block_stride = cur_block_stride;
-
- // Compute block normalization kernel. Update input to process
- input_block_norm.emplace_back(i, input_orient_bin.size() - 1);
- }
-
- // Update input to process for hog detector kernel
- input_hog_detect.push_back(input_block_norm.size() - 1);
- }
-
- _detection_windows = detection_windows;
- _non_maxima_suppression = non_maxima_suppression;
- _num_orient_bin_kernel = input_orient_bin.size(); // Number of CLHOGOrientationBinningKernel kernels to compute
- _num_block_norm_kernel = input_block_norm.size(); // Number of CLHOGBlockNormalizationKernel kernels to compute
- _num_hog_detect_kernel = input_hog_detect.size(); // Number of CLHOGDetector functions to compute
-
- _orient_bin_kernel.reserve(_num_orient_bin_kernel);
- _block_norm_kernel.reserve(_num_block_norm_kernel);
- _hog_detect_kernel.resize(_num_hog_detect_kernel);
- _hog_space.resize(_num_orient_bin_kernel);
- _hog_norm_space.resize(_num_block_norm_kernel);
-
- // Allocate tensors for magnitude and phase
- TensorInfo info_mag(shape_img, Format::S16);
- _mag.allocator()->init(info_mag);
-
- TensorInfo info_phase(shape_img, Format::U8);
- _phase.allocator()->init(info_phase);
-
- // Manage intermediate buffers
- _memory_group.manage(&_mag);
- _memory_group.manage(&_phase);
-
- // Initialise gradient kernel
- _gradient_kernel.configure(compile_context, input, &_mag, &_phase, phase_type, border_mode, constant_border_value);
-
- // Configure NETensor for the HOG space and orientation binning kernel
- for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
- {
- const size_t idx_multi_hog = input_orient_bin[i];
-
- // Get the corresponding cell size and number of bins
- const Size2D &cell = multi_hog->model(idx_multi_hog)->info()->cell_size();
- const size_t num_bins = multi_hog->model(idx_multi_hog)->info()->num_bins();
-
- // Calculate number of cells along the x and y directions for the hog_space
- const size_t num_cells_x = width / cell.width;
- const size_t num_cells_y = height / cell.height;
-
- // TensorShape of hog space
- TensorShape shape_hog_space = input->info()->tensor_shape();
- shape_hog_space.set(Window::DimX, num_cells_x);
- shape_hog_space.set(Window::DimY, num_cells_y);
-
- // Allocate HOG space
- TensorInfo info_space(shape_hog_space, num_bins, DataType::F32);
- _hog_space[i].allocator()->init(info_space);
-
- // Manage intermediate buffers
- _memory_group.manage(&_hog_space[i]);
-
- // Initialise orientation binning kernel
- _orient_bin_kernel.emplace_back(std::make_unique<CLHOGOrientationBinningKernel>());
- _orient_bin_kernel.back()->configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info());
- }
-
- // Allocate intermediate tensors
- _mag.allocator()->allocate();
- _phase.allocator()->allocate();
-
- // Configure CLTensor for the normalized HOG space and block normalization kernel
- for(size_t i = 0; i < _num_block_norm_kernel; ++i)
- {
- const size_t idx_multi_hog = input_block_norm[i].first;
- const size_t idx_orient_bin = input_block_norm[i].second;
-
- // Allocate normalized HOG space
- TensorInfo tensor_info(*(multi_hog->model(idx_multi_hog)->info()), width, height);
- _hog_norm_space[i].allocator()->init(tensor_info);
-
- // Manage intermediate buffers
- _memory_group.manage(&_hog_norm_space[i]);
-
- // Initialize block normalization kernel
- _block_norm_kernel.emplace_back(std::make_unique<CLHOGBlockNormalizationKernel>());
- _block_norm_kernel.back()->configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info());
- }
-
- // Allocate intermediate tensors
- for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
- {
- _hog_space[i].allocator()->allocate();
- }
-
- detection_window_strides->map(CLScheduler::get().queue(), true);
-
- // Configure HOG detector kernel
- for(size_t i = 0; i < _num_hog_detect_kernel; ++i)
- {
- const size_t idx_block_norm = input_hog_detect[i];
-
- _hog_detect_kernel[i].configure(compile_context, &_hog_norm_space[idx_block_norm], multi_hog->cl_model(i), detection_windows, detection_window_strides->at(i), threshold, i);
- }
-
- detection_window_strides->unmap(CLScheduler::get().queue());
-
- // Configure non maxima suppression kernel
- _non_maxima_kernel.configure(_detection_windows, min_distance);
-
- // Allocate intermediate tensors
- for(size_t i = 0; i < _num_block_norm_kernel; ++i)
- {
- _hog_norm_space[i].allocator()->allocate();
- }
-}
-
-void CLHOGMultiDetection::run()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_detection_windows == nullptr, "Unconfigured function");
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Reset detection window
- _detection_windows->clear();
-
- // Run gradient
- _gradient_kernel.run();
-
- // Run orientation binning kernel
- for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
- {
- CLScheduler::get().enqueue(*_orient_bin_kernel[i], false);
- }
-
- // Run block normalization kernel
- for(size_t i = 0; i < _num_block_norm_kernel; ++i)
- {
- CLScheduler::get().enqueue(*_block_norm_kernel[i], false);
- }
-
- // Run HOG detector kernel
- for(size_t i = 0; i < _num_hog_detect_kernel; ++i)
- {
- _hog_detect_kernel[i].run();
- }
-
- // Run non-maxima suppression kernel if enabled
- if(_non_maxima_suppression)
- {
- // Map detection windows array before computing non maxima suppression
- _detection_windows->map(CLScheduler::get().queue(), true);
- Scheduler::get().schedule(&_non_maxima_kernel, Window::DimY);
- _detection_windows->unmap(CLScheduler::get().queue());
- }
-}
diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp
deleted file mode 100644
index 37f428c677..0000000000
--- a/src/runtime/CL/functions/CLHarrisCorners.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
-#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "arm_compute/runtime/Scheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
-#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
-
-#include <cmath>
-#include <utility>
-
-using namespace arm_compute;
-
-CLHarrisCorners::CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _sobel(nullptr),
- _harris_score(std::make_unique<CLHarrisScoreKernel>()),
- _non_max_suppr(),
- _candidates(),
- _sort_euclidean(),
- _border_gx(std::make_unique<CLFillBorderKernel>()),
- _border_gy(std::make_unique<CLFillBorderKernel>()),
- _gx(),
- _gy(),
- _score(),
- _nonmax(),
- _corners_list(),
- _num_corner_candidates(0),
- _corners(nullptr)
-{
-}
-
-CLHarrisCorners::~CLHarrisCorners() = default;
-
-void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist,
- float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, threshold, min_dist, sensitivity, gradient_size, block_size, corners, border_mode, constant_border_value, use_fp16);
-}
-
-void CLHarrisCorners::configure(const CLCompileContext &compile_context, ICLImage *input, float threshold, float min_dist,
- float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
- BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
-{
- ARM_COMPUTE_UNUSED(use_fp16); //TODO(COMPMID-772): Add half float support
- ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(!(block_size == 3 || block_size == 5 || block_size == 7));
- ARM_COMPUTE_ERROR_ON(nullptr == corners);
-
- _corners = corners;
-
- const TensorShape shape = input->info()->tensor_shape();
- const DataType dt = (gradient_size < 7) ? DataType::S16 : DataType::S32;
- TensorInfo tensor_info(shape, 1, dt);
-
- _gx.allocator()->init(tensor_info);
- _gy.allocator()->init(tensor_info);
-
- TensorInfo info_f32(shape, 1, DataType::F32);
- _score.allocator()->init(info_f32);
- _nonmax.allocator()->init(info_f32);
-
- _corners_list.resize(shape.x() * shape.y());
-
- // Manage intermediate buffers
- _memory_group.manage(&_gx);
- _memory_group.manage(&_gy);
-
- /* Set/init Sobel kernel accordingly with gradient_size */
- switch(gradient_size)
- {
- case 3:
- {
- auto k = std::make_unique<CLSobel3x3>();
- k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
- _sobel = std::move(k);
- break;
- }
- case 5:
- {
- auto k = std::make_unique<CLSobel5x5>();
- k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
- _sobel = std::move(k);
- break;
- }
- case 7:
- {
- auto k = std::make_unique<CLSobel7x7>();
- k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
- _sobel = std::move(k);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Gradient size not implemented");
- }
-
- // Normalization factor
- const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size);
- const float pow4_normalization_factor = pow(norm_factor, 4);
-
- // Manage intermediate buffers
- _memory_group.manage(&_score);
-
- // Set/init Harris Score kernel accordingly with block_size
- _harris_score->configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
-
- // Configure border filling using harris score kernel's block size
- _border_gx->configure(compile_context, &_gx, _harris_score->border_size(), border_mode, PixelValue(constant_border_value));
- _border_gy->configure(compile_context, &_gy, _harris_score->border_size(), border_mode, PixelValue(constant_border_value));
-
- // Allocate intermediate buffers
- _gx.allocator()->allocate();
- _gy.allocator()->allocate();
-
- // Manage intermediate buffers
- _memory_group.manage(&_nonmax);
-
- // Init non-maxima suppression function
- _non_max_suppr.configure(compile_context, &_score, &_nonmax, border_mode);
-
- // Allocate intermediate buffers
- _score.allocator()->allocate();
-
- // Init corner candidates kernel
- _candidates.configure(&_nonmax, _corners_list.data(), &_num_corner_candidates);
-
- // Allocate intermediate buffers
- _nonmax.allocator()->allocate();
-
- // Init euclidean distance
- _sort_euclidean.configure(_corners_list.data(), _corners, &_num_corner_candidates, min_dist);
-}
-
-void CLHarrisCorners::run()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function");
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Init to 0 number of corner candidates
- _num_corner_candidates = 0;
-
- // Run Sobel kernel
- _sobel->run();
-
- // Fill border before harris score kernel
- CLScheduler::get().enqueue(*_border_gx, false);
- CLScheduler::get().enqueue(*_border_gy, false);
-
- // Run harris score kernel
- CLScheduler::get().enqueue(*_harris_score, false);
-
- // Run non-maxima suppression
- _non_max_suppr.run();
-
- // Run corner candidate kernel
- _nonmax.map(true);
- Scheduler::get().schedule(&_candidates, Window::DimY);
- _nonmax.unmap();
-
- _corners->map(CLScheduler::get().queue(), true);
- Scheduler::get().schedule(&_sort_euclidean, Window::DimY);
- _corners->unmap(CLScheduler::get().queue());
-}
diff --git a/src/runtime/CL/functions/CLHistogram.cpp b/src/runtime/CL/functions/CLHistogram.cpp
deleted file mode 100644
index f278cf0dc2..0000000000
--- a/src/runtime/CL/functions/CLHistogram.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLHistogram.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-using namespace arm_compute;
-
-CLHistogram::CLHistogram()
- : _kernel(), _kernel_border()
-{
-}
-
-void CLHistogram::configure(const ICLImage *input, ICLDistribution1D *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLHistogram::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output)
-{
- _kernel.configure(compile_context, input, output);
- _kernel_border.configure(compile_context, input, output);
-}
-
-void CLHistogram::run()
-{
- CLScheduler::get().enqueue(_kernel, false);
- CLScheduler::get().enqueue(_kernel_border);
-}
diff --git a/src/runtime/CL/functions/CLIntegralImage.cpp b/src/runtime/CL/functions/CLIntegralImage.cpp
deleted file mode 100644
index 56a151a085..0000000000
--- a/src/runtime/CL/functions/CLIntegralImage.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLIntegralImageKernel.h"
-
-using namespace arm_compute;
-
-CLIntegralImage::CLIntegralImage()
- : _integral_hor(std::make_unique<CLIntegralImageHorKernel>()),
- _integral_vert(std::make_unique<CLIntegralImageVertKernel>())
-{
-}
-
-CLIntegralImage::~CLIntegralImage() = default;
-
-void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLIntegralImage::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- _integral_hor->configure(compile_context, input, output);
- _integral_vert->configure(compile_context, output);
-}
-
-void CLIntegralImage::run()
-{
- CLScheduler::get().enqueue(*_integral_hor, false);
- CLScheduler::get().enqueue(*_integral_vert);
-}
diff --git a/src/runtime/CL/functions/CLLaplacianPyramid.cpp b/src/runtime/CL/functions/CLLaplacianPyramid.cpp
deleted file mode 100644
index 1ad19e56ea..0000000000
--- a/src/runtime/CL/functions/CLLaplacianPyramid.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
-#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
-#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
-
-using namespace arm_compute;
-
-CLLaplacianPyramid::CLLaplacianPyramid() // NOLINT
- : _num_levels(0),
- _gaussian_pyr_function(),
- _convf(),
- _subf(),
- _depth_function(),
- _gauss_pyr(),
- _conv_pyr()
-{
-}
-
-void CLLaplacianPyramid::configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, output, border_mode, constant_border_value);
-}
-
-void CLLaplacianPyramid::configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16);
- ARM_COMPUTE_ERROR_ON(0 == pyramid->info()->num_levels());
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width());
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height());
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0));
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1));
-
- _num_levels = pyramid->info()->num_levels();
-
- // Create and initialize the gaussian pyramid and the convoluted pyramid
- PyramidInfo pyramid_info;
- pyramid_info.init(_num_levels, 0.5f, pyramid->info()->tensor_shape(), arm_compute::Format::U8);
-
- _gauss_pyr.init(pyramid_info);
- _conv_pyr.init(pyramid_info);
-
- // Create Gaussian Pyramid function
- _gaussian_pyr_function.configure(compile_context, input, &_gauss_pyr, border_mode, constant_border_value);
-
- _convf.resize(_num_levels);
- _subf.resize(_num_levels);
-
- for(unsigned int i = 0; i < _num_levels; ++i)
- {
- _convf[i].configure(compile_context, _gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), border_mode, constant_border_value);
- _subf[i].configure(compile_context, _gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), pyramid->get_pyramid_level(i), ConvertPolicy::WRAP);
- }
-
- _depth_function.configure(compile_context, _conv_pyr.get_pyramid_level(_num_levels - 1), output, ConvertPolicy::WRAP, 0);
-
- _gauss_pyr.allocate();
- _conv_pyr.allocate();
-}
-
-void CLLaplacianPyramid::run()
-{
- ARM_COMPUTE_ERROR_ON_MSG(0 == _num_levels, "Unconfigured function");
-
- _gaussian_pyr_function.run(); // compute gaussian pyramid
-
- for(unsigned int i = 0; i < _num_levels; ++i)
- {
- _convf[i].run(); // convolute gaussian pyramid
- }
-
- for(unsigned int i = 0; i < _num_levels; ++i)
- {
- _subf[i].run(); // compute laplacian image
- }
-
- _depth_function.run();
-}
diff --git a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp
deleted file mode 100644
index d7fd81754b..0000000000
--- a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <cstddef>
-
-using namespace arm_compute;
-
-CLLaplacianReconstruct::CLLaplacianReconstruct() // NOLINT
- : _tmp_pyr(),
- _addf(),
- _scalef(),
- _depthf()
-{
-}
-
-void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), pyramid, input, output, border_mode, constant_border_value);
-}
-
-void CLLaplacianReconstruct::configure(const CLCompileContext &compile_context, const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
- ARM_COMPUTE_ERROR_ON(input == output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
- ARM_COMPUTE_ERROR_ON(output->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(0)->info()->dimension(0));
- ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(0)->info()->dimension(1));
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0));
- ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1));
-
- const size_t num_levels = pyramid->info()->num_levels();
-
- // Create and initialize the tmp pyramid: I(n-2) = upsample( input + Laplace(n-1) )
- PyramidInfo pyramid_info;
- pyramid_info.init(num_levels, 0.5f, output->info()->tensor_shape(), arm_compute::Format::S16);
- _tmp_pyr.init(pyramid_info);
-
- // Allocate add and scale functions. Level 0 does not need to be scaled.
- _addf.resize(num_levels);
- _scalef.resize(num_levels - 1);
-
- const size_t last_level = num_levels - 1;
-
- _addf[last_level].configure(compile_context, input, pyramid->get_pyramid_level(last_level), _tmp_pyr.get_pyramid_level(last_level), ConvertPolicy::SATURATE);
-
- // Scale levels n-1 to 1, and add levels n-2 to 0
- for(size_t l = 0; l < last_level; ++l)
- {
- _scalef[l].configure(compile_context, _tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), ScaleKernelInfo{ arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value });
- _addf[l].configure(compile_context, _tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE);
- }
-
- // Convert level 0 from S16 to U8
- _depthf.configure(compile_context, _tmp_pyr.get_pyramid_level(0), output, ConvertPolicy::SATURATE, 0);
-
- _tmp_pyr.allocate();
-}
-
-void CLLaplacianReconstruct::run()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_addf.empty(), "Unconfigured function");
-
- const size_t last_level = _tmp_pyr.info()->num_levels() - 1;
-
- _addf[last_level].run();
-
- // Run l = [last_level - 1, 0]
- for(size_t l = last_level; l-- > 0;)
- {
- _scalef[l].run();
- _addf[l].run();
- }
-
- _depthf.run();
-}
diff --git a/src/runtime/CL/functions/CLMagnitude.cpp b/src/runtime/CL/functions/CLMagnitude.cpp
deleted file mode 100644
index 0599a11fa1..0000000000
--- a/src/runtime/CL/functions/CLMagnitude.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
-
-#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLMagnitude::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, mag_type);
-}
-
-void CLMagnitude::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type)
-{
- auto k = std::make_unique<CLMagnitudePhaseKernel>();
- k->configure(compile_context, input1, input2, output, nullptr, mag_type);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp
deleted file mode 100644
index d8cd41d45f..0000000000
--- a/src/runtime/CL/functions/CLMeanStdDev.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/TensorInfo.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
-#include "src/core/CL/kernels/CLReductionOperationKernel.h"
-
-using namespace arm_compute;
-
-CLMeanStdDev::CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _data_type(),
- _num_pixels(),
- _run_stddev(),
- _reduction_operation_mean(),
- _reduction_operation_stddev(),
- _reduction_output_mean(),
- _reduction_output_stddev(),
- _mean(nullptr),
- _stddev(nullptr),
- _mean_stddev_kernel(std::make_unique<CLMeanStdDevKernel>()),
- _fill_border_kernel(std::make_unique<CLFillBorderKernel>()),
- _global_sum(),
- _global_sum_squared()
-{
-}
-
-CLMeanStdDev::~CLMeanStdDev() = default;
-
-Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
- if(is_data_type_float(input->data_type()))
- {
- ARM_COMPUTE_UNUSED(mean);
- ARM_COMPUTE_UNUSED(stddev);
-
- TensorShape output_shape = TensorShape{ 1, input->dimension(1) };
- TensorInfo output_shape_info = TensorInfo(output_shape, 1, DataType::U8);
- return CLReductionOperation::validate(input, &output_shape_info, 0, ReductionOperation::SUM);
- }
- else
- {
- return CLMeanStdDevKernel::validate(input, mean, nullptr, stddev, nullptr);
- }
-}
-
-void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, mean, stddev);
-}
-
-void CLMeanStdDev::configure(const CLCompileContext &compile_context, ICLImage *input, float *mean, float *stddev)
-{
- // In the case of F16/F32 we call reduction operation for calculating CLMeanStdDev
- _data_type = input->info()->data_type();
-
- if(is_data_type_float(_data_type))
- {
- _num_pixels = input->info()->dimension(0) * input->info()->dimension(1);
-
- _memory_group.manage(&_reduction_output_mean);
- _reduction_operation_mean.configure(compile_context, input, &_reduction_output_mean, 0, ReductionOperation::SUM);
- _reduction_output_mean.allocator()->allocate();
- _mean = mean;
-
- if(stddev != nullptr)
- {
- _memory_group.manage(&_reduction_output_stddev);
- _reduction_operation_stddev.configure(compile_context, input, &_reduction_output_stddev, 0, ReductionOperation::SUM_SQUARE);
- _reduction_output_stddev.allocator()->allocate();
- _stddev = stddev;
- _run_stddev = true;
- }
- }
- else
- {
- _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
-
- if(stddev != nullptr)
- {
- _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
- }
-
- _mean_stddev_kernel->configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared);
- _fill_border_kernel->configure(compile_context, input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
- }
-}
-
-template <typename T>
-void CLMeanStdDev::run_float()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Perform reduction on x-axis
- _reduction_operation_mean.run();
- if(_run_stddev)
- {
- _reduction_operation_stddev.run();
- _reduction_output_stddev.map(true);
- }
-
- _reduction_output_mean.map(true);
-
- auto mean = static_cast<T>(0);
-
- // Calculate final result for mean
- for(unsigned int i = 0; i < _reduction_output_mean.info()->dimension(1); ++i)
- {
- mean += *reinterpret_cast<T *>(_reduction_output_mean.buffer() + _reduction_output_mean.info()->offset_element_in_bytes(Coordinates(0, i)));
- }
-
- mean /= _num_pixels;
- *_mean = mean;
-
- if(_run_stddev)
- {
- auto stddev = static_cast<T>(0);
- // Calculate final result for stddev
- for(unsigned int i = 0; i < _reduction_output_stddev.info()->dimension(1); ++i)
- {
- stddev += *reinterpret_cast<T *>(_reduction_output_stddev.buffer() + _reduction_output_stddev.info()->offset_element_in_bytes(Coordinates(0, i)));
- }
- *_stddev = std::sqrt((stddev / _num_pixels) - (mean * mean));
-
- _reduction_output_stddev.unmap();
- }
- _reduction_output_mean.unmap();
-}
-
-void CLMeanStdDev::run_int()
-{
- CLScheduler::get().enqueue(*_fill_border_kernel);
- CLScheduler::get().enqueue(*_mean_stddev_kernel);
-}
-
-void CLMeanStdDev::run()
-{
- switch(_data_type)
- {
- case DataType::F16:
- run_float<half>();
- break;
- case DataType::F32:
- run_float<float>();
- break;
- case DataType::U8:
- run_int();
- break;
- default:
- ARM_COMPUTE_ERROR_ON("Not supported");
- }
-}
diff --git a/src/runtime/CL/functions/CLMedian3x3.cpp b/src/runtime/CL/functions/CLMedian3x3.cpp
deleted file mode 100644
index b32063a8fe..0000000000
--- a/src/runtime/CL/functions/CLMedian3x3.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLMedian3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
-}
-
-void CLMedian3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLMedian3x3Kernel>();
- k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLMinMaxLocation.cpp b/src/runtime/CL/functions/CLMinMaxLocation.cpp
deleted file mode 100644
index ace6a1cb21..0000000000
--- a/src/runtime/CL/functions/CLMinMaxLocation.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
-
-namespace arm_compute
-{
-CLMinMaxLocation::CLMinMaxLocation()
- : _min_max_kernel(std::make_unique<CLMinMaxKernel>()),
- _min_max_loc_kernel(std::make_unique<CLMinMaxLocationKernel>()),
- _min_max_vals(),
- _min_max_count_vals(),
- _min(nullptr),
- _max(nullptr),
- _min_count(nullptr),
- _max_count(nullptr),
- _min_loc(nullptr),
- _max_loc(nullptr)
-{
-}
-
-CLMinMaxLocation::~CLMinMaxLocation() = default;
-
-void CLMinMaxLocation::configure(const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, min, max, min_loc, max_loc, min_count, max_count);
-}
-
-void CLMinMaxLocation::configure(const CLCompileContext &compile_context, const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc,
- uint32_t *min_count,
- uint32_t *max_count)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == min);
- ARM_COMPUTE_ERROR_ON(nullptr == max);
-
- _min_max_vals = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 2 * sizeof(int32_t));
- _min_max_count_vals = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 2 * sizeof(uint32_t));
- _min = min;
- _max = max;
- _min_count = min_count;
- _max_count = max_count;
- _min_loc = min_loc;
- _max_loc = max_loc;
-
- _min_max_kernel->configure(compile_context, input, &_min_max_vals);
- _min_max_loc_kernel->configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc);
-}
-
-void CLMinMaxLocation::run()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- CLScheduler::get().enqueue(*_min_max_kernel, false);
- CLScheduler::get().enqueue(*_min_max_loc_kernel, false);
-
- // Update min and max
- q.enqueueReadBuffer(_min_max_vals, CL_FALSE, 0 * sizeof(int32_t), sizeof(int32_t), static_cast<int32_t *>(_min));
- q.enqueueReadBuffer(_min_max_vals, CL_FALSE, 1 * sizeof(int32_t), sizeof(int32_t), static_cast<int32_t *>(_max));
-
- // Update min and max count
- if(_min_count != nullptr)
- {
- q.enqueueReadBuffer(_min_max_count_vals, CL_FALSE, 0 * sizeof(uint32_t), sizeof(uint32_t), _min_count);
- }
- if(_max_count != nullptr)
- {
- q.enqueueReadBuffer(_min_max_count_vals, CL_FALSE, 1 * sizeof(uint32_t), sizeof(uint32_t), _max_count);
- }
-
- // Update min/max point arrays (Makes the kernel blocking)
- if(_min_loc != nullptr)
- {
- unsigned int min_count = 0;
- q.enqueueReadBuffer(_min_max_count_vals, CL_TRUE, 0 * sizeof(uint32_t), sizeof(uint32_t), &min_count);
- size_t min_corner_size = std::min(static_cast<size_t>(min_count), _min_loc->max_num_values());
- _min_loc->resize(min_corner_size);
- }
- if(_max_loc != nullptr)
- {
- unsigned int max_count = 0;
- q.enqueueReadBuffer(_min_max_count_vals, CL_TRUE, 1 * sizeof(uint32_t), sizeof(uint32_t), &max_count);
- size_t max_corner_size = std::min(static_cast<size_t>(max_count), _max_loc->max_num_values());
- _max_loc->resize(max_corner_size);
- }
-}
-} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLNonLinearFilter.cpp b/src/runtime/CL/functions/CLNonLinearFilter.cpp
deleted file mode 100644
index ec88f879b7..0000000000
--- a/src/runtime/CL/functions/CLNonLinearFilter.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
-
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLNonLinearFilter::configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, function, mask_size, pattern, mask, border_mode, constant_border_value);
-}
-
-void CLNonLinearFilter::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern,
- const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLNonLinearFilterKernel>();
- k->configure(compile_context, input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp
deleted file mode 100644
index 5906ea5a4b..0000000000
--- a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
-
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLNonMaximaSuppression3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode);
-}
-
-void CLNonMaximaSuppression3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode)
-{
- auto k = std::make_unique<CLNonMaximaSuppression3x3Kernel>();
- k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
-
- if(border_mode != BorderMode::UNDEFINED)
- {
- _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT);
- }
- else
- {
- _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED);
- }
-}
diff --git a/src/runtime/CL/functions/CLOpticalFlow.cpp b/src/runtime/CL/functions/CLOpticalFlow.cpp
deleted file mode 100644
index 76e0ac5f0b..0000000000
--- a/src/runtime/CL/functions/CLOpticalFlow.cpp
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLLKTrackerKernel.h"
-
-using namespace arm_compute;
-
-CLOpticalFlow::CLOpticalFlow(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _tracker_init_kernel(),
- _tracker_stage0_kernel(),
- _tracker_stage1_kernel(),
- _tracker_finalize_kernel(std::make_unique<CLLKTrackerFinalizeKernel>()),
- _func_scharr(),
- _scharr_gx(),
- _scharr_gy(),
- _old_points(nullptr),
- _new_points_estimates(nullptr),
- _new_points(nullptr),
- _old_points_internal(),
- _new_points_internal(),
- _coefficient_table(),
- _old_values(),
- _num_levels(0)
-{
-}
-
-CLOpticalFlow::~CLOpticalFlow() = default;
-
-void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
- const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
- BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), old_pyramid, new_pyramid, old_points, new_points_estimates, new_points, termination, epsilon, num_iterations, window_dimension,
- use_initial_estimate, border_mode, constant_border_value);
-}
-
-void CLOpticalFlow::configure(const CLCompileContext &compile_context, const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
- const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
- BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON(nullptr == old_pyramid);
- ARM_COMPUTE_ERROR_ON(nullptr == new_pyramid);
- ARM_COMPUTE_ERROR_ON(nullptr == old_points);
- ARM_COMPUTE_ERROR_ON(nullptr == new_points_estimates);
- ARM_COMPUTE_ERROR_ON(nullptr == new_points);
- ARM_COMPUTE_ERROR_ON(old_pyramid->info()->num_levels() != new_pyramid->info()->num_levels());
- ARM_COMPUTE_ERROR_ON(0 == old_pyramid->info()->num_levels());
- ARM_COMPUTE_ERROR_ON(old_pyramid->info()->width() != new_pyramid->info()->width());
- ARM_COMPUTE_ERROR_ON(old_pyramid->info()->height() != new_pyramid->info()->height());
- ARM_COMPUTE_ERROR_ON(use_initial_estimate && old_points->num_values() != new_points_estimates->num_values());
-
- // Set member variables
- _old_points = old_points;
- _new_points_estimates = new_points_estimates;
- _new_points = new_points;
- _num_levels = old_pyramid->info()->num_levels();
-
- const float pyr_scale = old_pyramid->info()->scale();
- const int list_length = old_points->num_values();
- const int old_values_list_length = list_length * window_dimension * window_dimension;
-
- // Create kernels and tensors
- _tracker_init_kernel.reserve(_num_levels);
- _tracker_stage0_kernel.reserve(_num_levels);
- _tracker_stage1_kernel.reserve(_num_levels);
- _func_scharr.resize(_num_levels);
- _scharr_gx.resize(_num_levels);
- _scharr_gy.resize(_num_levels);
-
- // Create internal keypoint arrays
- _old_points_internal = std::make_unique<CLLKInternalKeypointArray>(list_length);
- _old_points_internal->resize(list_length);
- _new_points_internal = std::make_unique<CLLKInternalKeypointArray>(list_length);
- _new_points_internal->resize(list_length);
- _coefficient_table = std::make_unique<CLCoefficientTableArray>(list_length);
- _coefficient_table->resize(list_length);
- _old_values = std::make_unique<CLOldValueArray>(old_values_list_length);
- _old_values->resize(old_values_list_length);
- _new_points->resize(list_length);
-
- for(size_t i = 0; i < _num_levels; ++i)
- {
- // Get images from the ith level of old and right pyramid
- ICLImage *old_ith_input = old_pyramid->get_pyramid_level(i);
- ICLImage *new_ith_input = new_pyramid->get_pyramid_level(i);
-
- // Get width and height of images
- const unsigned int width_ith = old_ith_input->info()->dimension(0);
- const unsigned int height_ith = new_ith_input->info()->dimension(1);
-
- // Initialize Scharr tensors
- TensorInfo tensor_info(TensorShape(width_ith, height_ith), 1, DataType::S16);
- _scharr_gx[i].allocator()->init(tensor_info);
- _scharr_gy[i].allocator()->init(tensor_info);
-
- // Manage intermediate buffers
- _memory_group.manage(&_scharr_gx[i]);
- _memory_group.manage(&_scharr_gy[i]);
-
- // Init Scharr kernel
- _func_scharr[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value);
-
- // Init Lucas-Kanade init kernel
- _tracker_init_kernel.emplace_back(std::make_unique<CLLKTrackerInitKernel>());
- _tracker_init_kernel.back()->configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale);
-
- // Init Lucas-Kanade stage0 kernel
- _tracker_stage0_kernel.emplace_back(std::make_unique<CLLKTrackerStage0Kernel>());
- _tracker_stage0_kernel.back()->configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i],
- _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
- window_dimension, i);
-
- // Init Lucas-Kanade stage1 kernel
- _tracker_stage1_kernel.emplace_back(std::make_unique<CLLKTrackerStage1Kernel>());
- _tracker_stage1_kernel.back()->configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
- termination, epsilon, num_iterations, window_dimension, i);
-
- // Allocate intermediate buffers
- _scharr_gx[i].allocator()->allocate();
- _scharr_gy[i].allocator()->allocate();
- }
-
- // Finalize Lucas-Kanade
- _tracker_finalize_kernel->configure(compile_context, _new_points_internal.get(), new_points);
-}
-
-void CLOpticalFlow::run()
-{
- ARM_COMPUTE_ERROR_ON_MSG(_num_levels == 0, "Unconfigured function");
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- for(unsigned int level = _num_levels; level > 0; --level)
- {
- // Run Scharr kernel
- _func_scharr[level - 1].run();
-
- // Run Lucas-Kanade init kernel
- CLScheduler::get().enqueue(*_tracker_init_kernel[level - 1]);
-
- // Run Lucas-Kanade stage0 kernel
- CLScheduler::get().enqueue(*_tracker_stage0_kernel[level - 1]);
-
- // Run Lucas-Kanade stage1 kernel
- CLScheduler::get().enqueue(*_tracker_stage1_kernel[level - 1]);
- }
-
- CLScheduler::get().enqueue(*_tracker_finalize_kernel, true);
-}
diff --git a/src/runtime/CL/functions/CLPhase.cpp b/src/runtime/CL/functions/CLPhase.cpp
deleted file mode 100644
index b2ff5d05ca..0000000000
--- a/src/runtime/CL/functions/CLPhase.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLPhase.h"
-
-#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLPhase::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, phase_type);
-}
-
-void CLPhase::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type)
-{
- auto k = std::make_unique<CLMagnitudePhaseKernel>();
- k->configure(compile_context, input1, input2, nullptr, output, MagnitudeType::L1NORM, phase_type);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLScharr3x3.cpp b/src/runtime/CL/functions/CLScharr3x3.cpp
deleted file mode 100644
index 563ec19266..0000000000
--- a/src/runtime/CL/functions/CLScharr3x3.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLScharr3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
-}
-
-void CLScharr3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLScharr3x3Kernel>();
- k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLSobel3x3.cpp b/src/runtime/CL/functions/CLSobel3x3.cpp
deleted file mode 100644
index 6724c12a72..0000000000
--- a/src/runtime/CL/functions/CLSobel3x3.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-CLSobel3x3::~CLSobel3x3() = default;
-
-void CLSobel3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
-}
-
-void CLSobel3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLSobel3x3Kernel>();
- k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLSobel5x5.cpp b/src/runtime/CL/functions/CLSobel5x5.cpp
deleted file mode 100644
index 98f215794c..0000000000
--- a/src/runtime/CL/functions/CLSobel5x5.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
-
-using namespace arm_compute;
-
-CLSobel5x5::CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)),
- _sobel_hor(std::make_unique<CLSobel5x5HorKernel>()),
- _sobel_vert(std::make_unique<CLSobel5x5VertKernel>()),
- _border_handler(std::make_unique<CLFillBorderKernel>()),
- _tmp_x(),
- _tmp_y()
-{
-}
-
-CLSobel5x5::~CLSobel5x5() = default;
-
-void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
-}
-
-void CLSobel5x5::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
-
- const bool run_sobel_x = output_x != nullptr;
- const bool run_sobel_y = output_y != nullptr;
-
- TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::S16);
-
- if(run_sobel_x && run_sobel_y)
- {
- _tmp_x.allocator()->init(tensor_info);
- _tmp_y.allocator()->init(tensor_info);
- _memory_group.manage(&_tmp_x);
- _memory_group.manage(&_tmp_y);
- _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
- _tmp_x.allocator()->allocate();
- _tmp_y.allocator()->allocate();
- }
- else if(run_sobel_x)
- {
- _tmp_x.allocator()->init(tensor_info);
- _memory_group.manage(&_tmp_x);
- _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _tmp_x.allocator()->allocate();
- }
- else if(run_sobel_y)
- {
- _tmp_y.allocator()->init(tensor_info);
- _memory_group.manage(&_tmp_y);
- _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
- _tmp_y.allocator()->allocate();
- }
- _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
-}
-
-void CLSobel5x5::run()
-{
- CLScheduler::get().enqueue(*_border_handler, false);
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- CLScheduler::get().enqueue(*_sobel_hor, false);
- CLScheduler::get().enqueue(*_sobel_vert);
-}
diff --git a/src/runtime/CL/functions/CLSobel7x7.cpp b/src/runtime/CL/functions/CLSobel7x7.cpp
deleted file mode 100644
index a3d63f98dd..0000000000
--- a/src/runtime/CL/functions/CLSobel7x7.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
-
-using namespace arm_compute;
-
-CLSobel7x7::CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)),
- _sobel_hor(std::make_unique<CLSobel7x7HorKernel>()),
- _sobel_vert(std::make_unique<CLSobel7x7VertKernel>()),
- _border_handler(std::make_unique<CLFillBorderKernel>()),
- _tmp_x(),
- _tmp_y()
-{
-}
-
-CLSobel7x7::~CLSobel7x7() = default;
-
-void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
-}
-
-void CLSobel7x7::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
-
- const bool run_sobel_x = output_x != nullptr;
- const bool run_sobel_y = output_y != nullptr;
-
- TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::S32);
-
- if(run_sobel_x && run_sobel_y)
- {
- _tmp_x.allocator()->init(tensor_info);
- _tmp_y.allocator()->init(tensor_info);
- _memory_group.manage(&_tmp_x);
- _memory_group.manage(&_tmp_y);
- _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
- _tmp_x.allocator()->allocate();
- _tmp_y.allocator()->allocate();
- }
- else if(run_sobel_x)
- {
- _tmp_x.allocator()->init(tensor_info);
- _memory_group.manage(&_tmp_x);
- _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _tmp_x.allocator()->allocate();
- }
- else if(run_sobel_y)
- {
- _tmp_y.allocator()->init(tensor_info);
- _memory_group.manage(&_tmp_y);
- _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
- _tmp_y.allocator()->allocate();
- }
- _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
-}
-
-void CLSobel7x7::run()
-{
- CLScheduler::get().enqueue(*_border_handler, false);
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- CLScheduler::get().enqueue(*_sobel_hor, false);
- CLScheduler::get().enqueue(*_sobel_vert);
-}
diff --git a/src/runtime/CL/functions/CLTableLookup.cpp b/src/runtime/CL/functions/CLTableLookup.cpp
deleted file mode 100644
index a4671f51bd..0000000000
--- a/src/runtime/CL/functions/CLTableLookup.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-
-#include "src/core/CL/kernels/CLTableLookupKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLTableLookup::configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, lut, output);
-}
-
-void CLTableLookup::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
-{
- auto k = std::make_unique<CLTableLookupKernel>();
- k->configure(compile_context, input, lut, output);
- _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLThreshold.cpp b/src/runtime/CL/functions/CLThreshold.cpp
deleted file mode 100644
index 70bc3b9365..0000000000
--- a/src/runtime/CL/functions/CLThreshold.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLThreshold.h"
-
-#include "src/core/CL/kernels/CLThresholdKernel.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-void CLThreshold::configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
-}
-
-void CLThreshold::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info)
-{
- auto k = std::make_unique<CLThresholdKernel>();
- k->configure(compile_context, input, output, info);
- _kernel = std::move(k);
-}
-} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLWarpAffine.cpp b/src/runtime/CL/functions/CLWarpAffine.cpp
deleted file mode 100644
index 9a22446cf6..0000000000
--- a/src/runtime/CL/functions/CLWarpAffine.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLWarpAffineKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLWarpAffine::configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy, border_mode, constant_border_value);
-}
-
-void CLWarpAffine::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLWarpAffineKernel>();
- k->configure(compile_context, input, output, matrix, policy);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/CL/functions/CLWarpPerspective.cpp b/src/runtime/CL/functions/CLWarpPerspective.cpp
deleted file mode 100644
index 0ec6b42e75..0000000000
--- a/src/runtime/CL/functions/CLWarpPerspective.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
-
-#include "arm_compute/core/PixelValue.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLWarpPerspective::configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy, border_mode, constant_border_value);
-}
-
-void CLWarpPerspective::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- auto k = std::make_unique<CLWarpPerspectiveKernel>();
- k->configure(compile_context, input, output, matrix, policy);
- _kernel = std::move(k);
- _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
-}
diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp
deleted file mode 100644
index ad62a2254a..0000000000
--- a/src/runtime/NEON/functions/NEConvolution.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "src/core/NEON/kernels/NEConvolutionKernel.h"
-#include "src/core/NEON/kernels/NEConvolutionKernel.h"
-#include "src/core/NEON/kernels/NEFillBorderKernel.h"
-
-#include <array>
-#include <utility>
-
-namespace arm_compute
-{
-NEConvolution3x3::~NEConvolution3x3() = default;
-
-void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
-{
- auto k = std::make_unique<NEConvolution3x3Kernel>();
- k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _kernel = std::move(k);
-
- auto b = std::make_unique<NEFillBorderKernel>();
- b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
- _border_handler = std::move(b);
-}
-
-template <unsigned int matrix_size>
-NEConvolutionSquare<matrix_size>::~NEConvolutionSquare() = default;
-
-template <unsigned int matrix_size>
-NEConvolutionSquare<matrix_size>::NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
-{
-}
-
-template <unsigned int matrix_size>
-void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
- uint8_t constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON(conv == nullptr);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
-
- std::array<int16_t, matrix_size> conv_col{ { 0 } };
- std::array<int16_t, matrix_size> conv_row{ { 0 } };
-
- _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
-
- auto b = std::make_unique<NEFillBorderKernel>();
- if(_is_separable)
- {
- DataType intermediate_type = DataType::UNKNOWN;
- std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), matrix_size);
-
- _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
-
- // Manage intermediate buffers
- _memory_group.manage(&_tmp);
-
- // Calculate scale
- if(scale == 0)
- {
- scale = calculate_matrix_scale(conv, matrix_size);
- }
-
- _kernel_hor = std::make_unique<NESeparableConvolutionHorKernel<matrix_size>>();
- _kernel_vert = std::make_unique<NESeparableConvolutionVertKernel<matrix_size>>();
-
- _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
-
- _tmp.allocator()->allocate();
-
- b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
- }
- else
- {
- _kernel = std::make_unique<NEConvolutionKernel<matrix_size>>();
- _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
- }
- _border_handler = std::move(b);
-}
-
-template <unsigned int matrix_size>
-void NEConvolutionSquare<matrix_size>::run()
-{
- NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
-
- if(_is_separable)
- {
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY);
- NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY);
- }
- else
- {
- NEScheduler::get().schedule(_kernel.get(), Window::DimY);
- }
-}
-
-template class arm_compute::NEConvolutionSquare<5>;
-template class arm_compute::NEConvolutionSquare<7>;
-template class arm_compute::NEConvolutionSquare<9>;
-
-NEConvolutionRectangle::~NEConvolutionRectangle() = default;
-
-void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
-{
- border_mode = (border_mode == BorderMode::UNDEFINED) ? BorderMode::CONSTANT : border_mode;
- auto k = std::make_unique<NEConvolutionRectangleKernel>();
- k->configure(input, output, conv, rows, cols, scale, false);
- _kernel = std::move(k);
-
- auto b = std::make_unique<NEFillBorderKernel>();
- b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
- _border_handler = std::move(b);
-}
-} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NERemap.cpp
index a34be71ea0..a55f7bc218 100644
--- a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
+++ b/src/runtime/NEON/functions/NERemap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,30 +21,35 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
+#include "arm_compute/runtime/NEON/functions/NERemap.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/NEON/kernels/NEFillBorderKernel.h"
-#include "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+#include "src/core/NEON/kernels/NERemapKernel.h"
#include <utility>
namespace arm_compute
{
-void NENonMaximaSuppression3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode)
+void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
{
- auto k = std::make_unique<NENonMaximaSuppression3x3Kernel>();
- k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported");
+
+ auto k = std::make_unique<NERemapKernel>();
+ k->configure(input, map_x, map_y, output, policy);
_kernel = std::move(k);
auto b = std::make_unique<NEFillBorderKernel>();
- if(border_mode != BorderMode::UNDEFINED)
- {
- b->configure(input, BorderSize(1), BorderMode::CONSTANT, static_cast<float>(0.f));
- }
- else
- {
- b->configure(input, BorderSize(1), BorderMode::UNDEFINED, static_cast<float>(0.f));
- }
+ b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
_border_handler = std::move(b);
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/tests/validation/CL/AbsoluteDifference.cpp b/tests/validation/CL/AbsoluteDifference.cpp
deleted file mode 100644
index f3eb129118..0000000000
--- a/tests/validation/CL/AbsoluteDifference.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/AbsoluteDifferenceFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Input data sets **/
-const auto AbsoluteDifferenceU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
- DataType::U8));
-const auto AbsoluteDifferenceS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
- framework::dataset::make("DataType", DataType::S16));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(AbsoluteDifference)
-
-template <typename T>
-using CLAbsoluteDifferenceFixture = AbsoluteDifferenceValidationFixture<CLTensor, CLAccessor, CLAbsoluteDifference, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AbsoluteDifferenceU8Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceU8Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AbsoluteDifferenceS16Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceS16Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // S16
-
-TEST_SUITE_END() // AbsoluteDifference
-TEST_SUITE_END() // CL
-
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Accumulate.cpp b/tests/validation/CL/Accumulate.cpp
deleted file mode 100644
index 8f5c6d5deb..0000000000
--- a/tests/validation/CL/Accumulate.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/AccumulateFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Tolerance value for comparing reference's output against implementation's output for floating point data types */
-constexpr AbsoluteTolerance<float> tolerance(1.0f);
-/** Input data sets **/
-const auto AccumulateU8Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8));
-const auto AccumulateS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-} // namespace
-TEST_SUITE(CL)
-TEST_SUITE(Accumulate)
-
-TEST_SUITE(U8)
-template <typename T1>
-using CLAccumulateFixture = AccumulateValidationFixture<CLTensor, CLAccessor, CLAccumulate, T1, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateS16Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(AccumulateWeighted)
-
-TEST_SUITE(U8)
-template <typename T1>
-using CLAccumulateWeightedFixture = AccumulateWeightedValidationFixture<CLTensor, CLAccessor, CLAccumulateWeighted, T1, uint8_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateU8Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateU8Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(AccumulateSquared)
-
-TEST_SUITE(U8)
-template <typename T1>
-using CLAccumulateSquaredFixture = AccumulateSquaredValidationFixture<CLTensor, CLAccessor, CLAccumulateSquared, T1, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateS16Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Box3x3.cpp b/tests/validation/CL/Box3x3.cpp
deleted file mode 100644
index 6fd531b798..0000000000
--- a/tests/validation/CL/Box3x3.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Box3x3Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Box3x3)
-
-template <typename T>
-using CLBox3x3Fixture = Box3x3ValidationFixture<CLTensor, CLAccessor, CLBox3x3, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLBox3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLBox3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/CannyEdge.cpp b/tests/validation/CL/CannyEdge.cpp
deleted file mode 100644
index c127eaca84..0000000000
--- a/tests/validation/CL/CannyEdge.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ImageFileDatasets.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/CannyEdgeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Allowed ratio of mismatches between target and reference (1.0 = 100%) */
-const float allowed_mismatch_ratio = 0.1f;
-
-const auto data = combine(framework::dataset::make("GradientSize",
-{ 3, 5, 7 }),
-combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes()));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(CannyEdge)
-
-template <typename T>
-using CLCannyEdgeFixture = CannyEdgeValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLCannyEdge, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLCannyEdgeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallImageFiles(), data), framework::dataset::make("Format", Format::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, AbsoluteTolerance<uint8_t>(0), allowed_mismatch_ratio);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLCannyEdgeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data), framework::dataset::make("Format", Format::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, AbsoluteTolerance<uint8_t>(0), allowed_mismatch_ratio);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/ChannelCombine.cpp b/tests/validation/CL/ChannelCombine.cpp
deleted file mode 100644
index 2ed0765eb9..0000000000
--- a/tests/validation/CL/ChannelCombine.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMultiImage.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ChannelCombineFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(ChannelCombine)
-
-template <typename T>
-using CLChannelCombineFixture = ChannelCombineValidationFixture<CLMultiImage, CLTensor, CLAccessor, CLChannelCombine, T>;
-
-TEST_SUITE(RGBA)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 })))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 })))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-TEST_SUITE_END() // RGBA
-
-TEST_SUITE(YUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 })))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 })))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-TEST_SUITE_END() // YUV
-
-TEST_SUITE(YUVPlanar)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::NV12, Format::NV21, Format::IYUV, Format::YUV444 })))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::NV12, Format::NV21, Format::IYUV, Format::YUV444 })))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-TEST_SUITE_END() // YUVPlanar
-
-TEST_SUITE_END() // ChannelCombine
-TEST_SUITE_END() // CL
-
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/ChannelExtract.cpp b/tests/validation/CL/ChannelExtract.cpp
deleted file mode 100644
index b02741f1de..0000000000
--- a/tests/validation/CL/ChannelExtract.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMultiImage.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ChannelExtractFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-// Input data sets
-const auto ChannelExtractRGBADataset = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }),
- framework::dataset::make("ChannelType", { Channel::R, Channel::G, Channel::B, Channel::A }));
-const auto ChannelExtractYUVDataset = combine(framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }),
- framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
-const auto ChannelExtractYUVPlanarDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444, Format::NV12, Format::NV21 }),
- framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(ChannelExtract)
-
-template <typename T>
-using CLChannelExtractFixture = ChannelExtractValidationFixture<CLMultiImage, CLTensor, CLAccessor, CLChannelExtract, T>;
-
-TEST_SUITE(RGBA)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractRGBADataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractRGBADataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // RGBA
-
-TEST_SUITE(YUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractYUVDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // YUV
-
-TEST_SUITE(YUVPlanar)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractYUVPlanarDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVPlanarDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // YUVPlanar
-
-TEST_SUITE_END() // ChannelExtract
-TEST_SUITE_END() // CL
-
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/ColorConvert.cpp b/tests/validation/CL/ColorConvert.cpp
deleted file mode 100644
index 0d672a09ff..0000000000
--- a/tests/validation/CL/ColorConvert.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLMultiImage.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ColorConvertFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<uint8_t> tolerance_nv(2);
-constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
-
-// Input data sets
-const auto RGBDataset = framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 });
-const auto YUYVDataset = framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 });
-
-const auto ColorConvert_RGBA_to_RGB = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }),
- framework::dataset::make("FormatType", { Format::RGB888 }));
-
-const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
- framework::dataset::make("FormatType", { Format::RGBA8888 }));
-
-const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
- framework::dataset::make("FormatType", { Format::U8 }));
-
-const auto ColorConvert_YUYV_to_RGBDataset = combine(YUYVDataset,
- RGBDataset);
-
-const auto ColorConvert_YUVPlanar_to_RGBDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::NV12, Format::NV21 }),
- RGBDataset);
-
-const auto ColorConvert_RGBDataset_to_NVDataset = combine(RGBDataset,
- framework::dataset::make("FormatType", { Format::NV12, Format::IYUV, Format::YUV444 }));
-
-const auto ColorConvert_YUYVDataset_to_NVDataset = combine(YUYVDataset,
- framework::dataset::make("FormatType", { Format::NV12, Format::IYUV }));
-
-const auto ColorConvert_NVDataset_to_YUVDataset = combine(framework::dataset::make("FormatType", { Format::NV12, Format::NV21 }),
- framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444 }));
-
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(ColorConvert)
-
-template <typename T>
-using CLColorConvertFixture = ColorConvertValidationFixture<CLMultiImage, CLTensor, CLAccessor, CLColorConvert, T>;
-
-TEST_SUITE(RGBA)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGBA_to_RGB))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGBA_to_RGB))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(RGB)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_RGBA))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_RGBA))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(RGBtoU8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_u8);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_u8);
- }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(YUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUYV_to_RGBDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUYV_to_RGBDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(YUVPlanar)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUVPlanar_to_RGBDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUVPlanar_to_RGBDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(NV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGBDataset_to_NVDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_nv);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGBDataset_to_NVDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_nv);
- }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(YUYVtoNV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUYVDataset_to_NVDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUYVDataset_to_NVDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-
-TEST_SUITE_END()
-
-TEST_SUITE(NVtoYUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_NVDataset_to_YUVDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_NVDataset_to_YUVDataset))
-{
- // Validate output
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
- }
-}
-
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Convolution.cpp b/tests/validation/CL/Convolution.cpp
deleted file mode 100644
index 1608e7c66d..0000000000
--- a/tests/validation/CL/Convolution.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLConvolution.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ConvolutionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(CustomConvolution)
-TEST_SUITE(Square3x3)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution3x3, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 3 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 3 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square 3x3
-
-TEST_SUITE(Square5x5)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square5x5
-
-TEST_SUITE(Square7x7)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square7x7
-
-TEST_SUITE(Square9x9)
-
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square9x9
-
-TEST_SUITE(Rectangle)
-template <typename T>
-using CLConvolutionFixture = ConvolutionRectangleValidationFixture<CLTensor, CLAccessor, CLConvolutionRectangle, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
- framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
- framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Rectangle
-
-TEST_SUITE(Separable5x5)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Separable5x5
-
-TEST_SUITE(Separable7x7)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Separable7x7
-
-TEST_SUITE(Separable9x9)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END()
-TEST_SUITE_END() // Separable9x9
-
-TEST_SUITE_END() // Custom Convolution
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Derivative.cpp b/tests/validation/CL/Derivative.cpp
deleted file mode 100644
index b8f6856a47..0000000000
--- a/tests/validation/CL/Derivative.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLDerivative.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/GradientDimensionDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DerivativeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Derivative)
-
-using CLDerivativeFixture = DerivativeValidationFixture<CLTensor, CLAccessor, CLDerivative, uint8_t, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- datasets::GradientDimensions()))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- datasets::GradientDimensions()))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Dilate.cpp b/tests/validation/CL/Dilate.cpp
deleted file mode 100644
index c5fdb3faa7..0000000000
--- a/tests/validation/CL/Dilate.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLDilate.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DilateFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Dilate)
-
-template <typename T>
-using CLDilateFixture = DilateValidationFixture<CLTensor, CLAccessor, CLDilate, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDilateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDilateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/EqualizeHistogram.cpp b/tests/validation/CL/EqualizeHistogram.cpp
deleted file mode 100644
index 3585825782..0000000000
--- a/tests/validation/CL/EqualizeHistogram.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/EqualizeHistogramFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(EqualizeHistogram)
-
-template <typename T>
-using CLEqualizeHistogramFixture = EqualizeHistogramValidationFixture<CLTensor, CLAccessor, CLEqualizeHistogram, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Erode.cpp b/tests/validation/CL/Erode.cpp
deleted file mode 100644
index dd75b59a6b..0000000000
--- a/tests/validation/CL/Erode.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLErode.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ErodeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Erode)
-
-template <typename T>
-using CLErodeFixture = ErodeValidationFixture<CLTensor, CLAccessor, CLErode, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLErodeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLErodeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/FastCorners.cpp b/tests/validation/CL/FastCorners.cpp
deleted file mode 100644
index 40a511e000..0000000000
--- a/tests/validation/CL/FastCorners.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ImageFileDatasets.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/FastCornersFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Tolerance used to compare corner strengths */
-const AbsoluteTolerance<float> tolerance(0.5f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(FastCorners)
-
-template <typename T>
-using CLFastCornersFixture = FastCornersValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLFastCorners, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFastCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallImageFiles(), framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("SuppressNonMax", { false, true })),
- framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
-{
- // Validate output
- CLArrayAccessor<KeyPoint> array(_target);
- validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFastCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageFiles(), framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("SuppressNonMax", { false, true })),
- framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
-{
- // Validate output
- CLArrayAccessor<KeyPoint> array(_target);
- validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Gaussian3x3.cpp b/tests/validation/CL/Gaussian3x3.cpp
deleted file mode 100644
index b7672bdac0..0000000000
--- a/tests/validation/CL/Gaussian3x3.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Gaussian3x3Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Gaussian3x3)
-
-template <typename T>
-using CLGaussian3x3Fixture = Gaussian3x3ValidationFixture<CLTensor, CLAccessor, CLGaussian3x3, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGaussian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGaussian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Gaussian5x5.cpp b/tests/validation/CL/Gaussian5x5.cpp
deleted file mode 100644
index f2a1a30f33..0000000000
--- a/tests/validation/CL/Gaussian5x5.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Gaussian5x5Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 5; /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Gaussian5x5)
-
-template <typename T>
-using CLGaussian5x5Fixture = Gaussian5x5ValidationFixture<CLTensor, CLAccessor, CLGaussian5x5, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGaussian5x5Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGaussian5x5Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/GaussianPyramid.cpp b/tests/validation/CL/GaussianPyramid.cpp
deleted file mode 100644
index d29f6752d6..0000000000
--- a/tests/validation/CL/GaussianPyramid.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/GaussianPyramidHalfFixture.h"
-#include "tests/validation/reference/Utils.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-const auto small_gaussian_pyramid_levels = combine(datasets::Medium2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 4);
-const auto large_gaussian_pyramid_levels = combine(datasets::Large2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 5);
-
-template <typename T>
-inline void validate_gaussian_pyramid(const CLPyramid &target, const std::vector<SimpleTensor<T>> &reference, BorderMode border_mode)
-{
- ValidRegion prev_valid_region = shape_to_valid_region(reference[0].shape());
-
- for(size_t i = 1; i < reference.size(); ++i)
- {
- const ValidRegion valid_region = shape_to_valid_region_gaussian_pyramid_half(reference[i - 1].shape(), prev_valid_region, (border_mode == BorderMode::UNDEFINED));
-
- // Validate outputs
- validate(CLAccessor(*(target.get_pyramid_level(i))), reference[i], valid_region);
-
- // Keep the valid region for the next level
- prev_valid_region = valid_region;
- }
-}
-
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(GaussianPyramid)
-TEST_SUITE(Half)
-template <typename T>
-using CLGaussianPyramidHalfFixture = GaussianPyramidHalfValidationFixture<CLTensor, CLAccessor, CLGaussianPyramidHalf, T, CLPyramid>;
-
-FIXTURE_DATA_TEST_CASE(RunSmallGaussianPyramidHalf, CLGaussianPyramidHalfFixture<uint8_t>, framework::DatasetMode::NIGHTLY, small_gaussian_pyramid_levels)
-{
- validate_gaussian_pyramid(_target, _reference, _border_mode);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLargeGaussianPyramidHalf, CLGaussianPyramidHalfFixture<uint8_t>, framework::DatasetMode::NIGHTLY, large_gaussian_pyramid_levels)
-{
- validate_gaussian_pyramid(_target, _reference, _border_mode);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/HOGDescriptor.cpp b/tests/validation/CL/HOGDescriptor.cpp
deleted file mode 100644
index a73e563283..0000000000
--- a/tests/validation/CL/HOGDescriptor.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLHOG.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/HOGDescriptorDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HOGDescriptorFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-RelativeTolerance<float> tolerance(0.001f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HOGDescriptor)
-
-using CLHOGDescriptorFixture = HOGDescriptorValidationFixture<CLTensor, CLHOG, CLAccessor, CLHOGDescriptor, uint8_t, float>;
-
-// *INDENT-OFF*
-// clang-format off
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHOGDescriptorFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(
- datasets::SmallHOGDescriptorDataset(),
- framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHOGDescriptorFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(
- datasets::LargeHOGDescriptorDataset(),
- framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/HOGDetector.cpp b/tests/validation/CL/HOGDetector.cpp
deleted file mode 100644
index 2d1904ffe8..0000000000
--- a/tests/validation/CL/HOGDetector.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/CL/CLHOGAccessor.h"
-#include "tests/datasets/HOGDescriptorDataset.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HOGDetectorFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Set the tolerance (percentage) used when validating the score of detection window. */
-RelativeTolerance<float> tolerance(0.01f);
-
-/* Input dataset (values must be a multiple of the HOGInfo block_size) */
-const auto DetectionWindowStrideDataset = framework::dataset::make("DetectionWindowStride", { Size2D(8, 8), Size2D(16, 16) });
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HOGDetector)
-
-// *INDENT-OFF*
-// clang-format off
-using CLHOGDetectorFixture = HOGDetectorValidationFixture<CLTensor,
- CLHOG,
- CLDetectionWindowArray,
- CLHOGDescriptor,
- CLAccessor,
- CLArrayAccessor<DetectionWindow>,
- CLHOGAccessor,
- CLHOGDetector,
- uint8_t,
- float>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHOGDetectorFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- DetectionWindowStrideDataset,
- datasets::SmallHOGDescriptorDataset()),
- framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-
-{
- // Validate output
- validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHOGDetectorFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- DetectionWindowStrideDataset,
- datasets::LargeHOGDescriptorDataset()),
- framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-{
- // Validate output
- validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/HOGMultiDetection.cpp b/tests/validation/CL/HOGMultiDetection.cpp
deleted file mode 100644
index 4ca1dab32e..0000000000
--- a/tests/validation/CL/HOGMultiDetection.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLMultiHOG.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/CL/CLHOGAccessor.h"
-#include "tests/datasets/HOGMultiDetectionDataset.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HOGMultiDetectionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Set the tolerance (percentage) used when validating the strength of detection window. */
-RelativeTolerance<float> tolerance(0.1f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HOGMultiDetection)
-
-// *INDENT-OFF*
-// clang-format off
-using CLHOGMultiDetectionFixture = HOGMultiDetectionValidationFixture<CLTensor,
- CLHOG,
- CLMultiHOG,
- CLDetectionWindowArray,
- CLSize2DArray,
- CLAccessor,
- CLArrayAccessor<Size2D>,
- CLArrayAccessor<DetectionWindow>,
- CLHOGAccessor,
- CLHOGMultiDetection,
- uint8_t,
- float>;
-
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHOGMultiDetectionFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- datasets::SmallHOGMultiDetectionDataset(),
- framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})),
- framework::dataset::make("NonMaximaSuppression", {false, true})))
-{
- // Validate output
- validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHOGMultiDetectionFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- datasets::LargeHOGMultiDetectionDataset(),
- framework::dataset::make("Format", Format::U8)),
- framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})),
- framework::dataset::make("NonMaximaSuppression", {false, true})))
-{
- // Validate output
- validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/HarrisCorners.cpp b/tests/validation/CL/HarrisCorners.cpp
deleted file mode 100644
index 157102f268..0000000000
--- a/tests/validation/CL/HarrisCorners.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ImageFileDatasets.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HarrisCornersFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-const auto data_nightly = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("BlockSize", { 3, 5, 7 }), datasets::BorderModes()));
-const auto data_precommit = combine(framework::dataset::make("GradientSize", { 3 }), combine(framework::dataset::make("BlockSize", { 3 }), datasets::BorderModes()));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HarrisCorners)
-
-template <typename T>
-using CLHarrisCornersFixture = HarrisCornersValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLHarrisCorners, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallImageFiles(), data_precommit), framework::dataset::make("Format",
- Format::U8)))
-{
- // Validate output
- CLArrayAccessor<KeyPoint> array(_target);
- validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), RelativeTolerance<float>(0.0001f));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data_nightly), framework::dataset::make("Format",
- Format::U8)))
-{
- // Validate output
- CLArrayAccessor<KeyPoint> array(_target);
- validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), RelativeTolerance<float>(0.0001f));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Histogram.cpp b/tests/validation/CL/Histogram.cpp
deleted file mode 100644
index 2619a0067c..0000000000
--- a/tests/validation/CL/Histogram.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLDistribution1D.h"
-#include "arm_compute/runtime/CL/functions/CLHistogram.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HistogramFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Histogram)
-
-template <typename T>
-using CLHistogramFixture = HistogramValidationFixture<CLTensor, CLAccessor, CLHistogram, T, CLDistribution1D>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/IntegralImage.cpp b/tests/validation/CL/IntegralImage.cpp
deleted file mode 100644
index e3b728ab9c..0000000000
--- a/tests/validation/CL/IntegralImage.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/IntegralImageFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(IntegralImage)
-template <typename T>
-using CLIntegralImageFixture = IntegralImageValidationFixture<CLTensor, CLAccessor, CLIntegralImage, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLIntegralImageFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLIntegralImageFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/LaplacianPyramid.cpp b/tests/validation/CL/LaplacianPyramid.cpp
deleted file mode 100644
index 801115ea09..0000000000
--- a/tests/validation/CL/LaplacianPyramid.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/LaplacianPyramidFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Absolute tolerance value for comparing reference's output against implementation's output for DataType::S16
- * Tolerance is needed for calculation uncertainties introduced from the layers
- */
-AbsoluteTolerance<int16_t> tolerance_int16(1);
-const auto small_laplacian_pyramid_levels = framework::dataset::make("NumLevels", 2, 3);
-const auto large_laplacian_pyramid_levels = framework::dataset::make("NumLevels", 2, 5);
-
-const auto formats = combine(framework::dataset::make("FormatIn", Format::U8), framework::dataset::make("FormatOut", Format::S16));
-
-template <typename T>
-inline void validate_laplacian_pyramid(const CLPyramid &target, const std::vector<SimpleTensor<T>> &reference, BorderMode border_mode)
-{
- CLTensor *level_image = target.get_pyramid_level(0);
- ValidRegion valid_region = shape_to_valid_region(reference[0].shape(), border_mode == BorderMode::UNDEFINED, BorderSize(2));
-
- // Validate lowest level
- validate(CLAccessor(*level_image), reference[0], valid_region);
-
- // Validate remaining levels
- for(size_t lev = 1; lev < target.info()->num_levels(); lev++)
- {
- level_image = target.get_pyramid_level(lev);
- CLTensor *prev_level_image = target.get_pyramid_level(lev - 1);
-
- valid_region = shape_to_valid_region_laplacian_pyramid(prev_level_image->info()->tensor_shape(),
- prev_level_image->info()->valid_region(),
- border_mode == BorderMode::UNDEFINED);
-
- // Validate level
- validate(CLAccessor(*level_image), reference[lev], valid_region, tolerance_int16);
- }
-}
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(LaplacianPyramid)
-
-// *INDENT-OFF*
-// clang-format off
-
-using CLLaplacianPyramidFixture = LaplacianPyramidValidationFixture<CLTensor, CLAccessor, CLLaplacianPyramid, uint8_t, int16_t, CLPyramid>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLLaplacianPyramidFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- datasets::Medium2DShapes(),
- datasets::BorderModes()),
- small_laplacian_pyramid_levels),
- formats))
-{
- validate_laplacian_pyramid(_target, _reference, _border_mode);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLLaplacianPyramidFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- datasets::Large2DShapes(),
- datasets::BorderModes()),
- large_laplacian_pyramid_levels),
- formats))
-{
- validate_laplacian_pyramid(_target, _reference, _border_mode);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/LaplacianReconstruct.cpp b/tests/validation/CL/LaplacianReconstruct.cpp
deleted file mode 100644
index e6e32ce910..0000000000
--- a/tests/validation/CL/LaplacianReconstruct.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/LaplacianReconstructFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-const auto small_laplacian_reconstruct_levels = framework::dataset::make("NumLevels", 2, 3);
-const auto large_laplacian_reconstruct_levels = framework::dataset::make("NumLevels", 2, 5);
-
-const auto formats = combine(framework::dataset::make("FormatIn", Format::S16), framework::dataset::make("FormatOut", Format::U8));
-
-template <typename T>
-void validate_laplacian_reconstruct(CLTensor &target, const SimpleTensor<T> &reference, BorderMode border_mode, size_t num_levels)
-{
- const unsigned int filter_size = 5;
- const unsigned int border_size(filter_size / 2);
-
- BorderSize border(std::pow(border_size, num_levels));
-
- // Validate output
- ValidRegion valid_region = shape_to_valid_region(reference.shape(), border_mode == BorderMode::UNDEFINED, border);
- validate(CLAccessor(target), reference, valid_region);
-}
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(LaplacianReconstruct)
-
-// *INDENT-OFF*
-// clang-format off
-
-using CLLaplacianReconstructFixture = LaplacianReconstructValidationFixture<CLTensor, CLAccessor, CLLaplacianReconstruct, CLLaplacianPyramid, int16_t, uint8_t, CLPyramid>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLLaplacianReconstructFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- datasets::Medium2DShapes(),
- datasets::BorderModes()),
- small_laplacian_reconstruct_levels),
- formats))
-{
- validate_laplacian_reconstruct(_target, _reference, _border_mode, _pyramid_levels);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLLaplacianReconstructFixture, framework::DatasetMode::NIGHTLY,
- combine(combine(combine(
- datasets::Large2DShapes(),
- datasets::BorderModes()),
- large_laplacian_reconstruct_levels),
- formats))
-{
- validate_laplacian_reconstruct(_target, _reference, _border_mode, _pyramid_levels);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Magnitude.cpp b/tests/validation/CL/Magnitude.cpp
deleted file mode 100644
index bf5879b527..0000000000
--- a/tests/validation/CL/Magnitude.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/MagnitudeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-template <typename T>
-AbsoluteTolerance<T> tolerance(MagnitudeType magnitude_type)
-{
- return AbsoluteTolerance<T>((MagnitudeType::L1NORM == magnitude_type) ? 0 : 1);
-}
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Magnitude)
-
-template <typename T>
-using CLMagnitudeFixture = MagnitudeValidationFixture<CLTensor, CLAccessor, CLMagnitude, T>;
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMagnitudeFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S16)),
- framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMagnitudeFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
- framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
-}
-TEST_SUITE_END() // S16
-
-TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMagnitudeFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S32)),
- framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance<int32_t>(_magnitude_type));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMagnitudeFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S32)),
- framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, tolerance<int32_t>(_magnitude_type));
-}
-TEST_SUITE_END() // S32
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/MeanStdDev.cpp b/tests/validation/CL/MeanStdDev.cpp
deleted file mode 100644
index 0e5135ec44..0000000000
--- a/tests/validation/CL/MeanStdDev.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/MeanStdDevFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-RelativeTolerance<float> tolerance_rel_high_error(0.05f);
-RelativeTolerance<float> tolerance_rel_low_error(0.0005f);
-AbsoluteTolerance<float> tolerance_rel_high_error_f32(0.01f);
-AbsoluteTolerance<float> tolerance_rel_low_error_f32(0.00001f);
-AbsoluteTolerance<float> tolerance_rel_high_error_f16(0.1f);
-AbsoluteTolerance<float> tolerance_rel_low_error_f16(0.01f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(MeanStdDev)
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
- framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32), // Wrong input data type
- TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8), // Invalid shape
- TensorInfo(TensorShape(16U, 16U), 1, DataType::U8), // Valid
- }),
- framework::dataset::make("Expected", { false, false, true })),
- input_info, expected)
-{
- ARM_COMPUTE_EXPECT(bool(CLMeanStdDev::validate(&input_info.clone()->set_is_resizable(false), nullptr, nullptr)) == expected, framework::LogLevel::ERRORS);
-}
-// clang-format on
-// *INDENT-ON*
-
-template <typename T>
-using CLMeanStdDevFixture = MeanStdDevValidationFixture<CLTensor, CLAccessor, CLMeanStdDev, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate mean output
- validate(_target.first, _reference.first);
-
- // Validate std_dev output
- validate(_target.second, _reference.second, tolerance_rel_high_error);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate mean output
- validate(_target.first, _reference.first, tolerance_rel_low_error);
-
- // Validate std_dev output
- validate(_target.second, _reference.second, tolerance_rel_high_error);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::F16)))
-{
- // Validate mean output
- validate(_target.first, _reference.first, tolerance_rel_low_error_f16);
-
- // Validate std_dev output
- validate(_target.second, _reference.second, tolerance_rel_high_error_f16);
-}
-TEST_SUITE_END() // F16
-
-TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::F32)))
-{
- // Validate mean output
- validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
-
- // Validate std_dev output
- validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::F32)))
-{
- // Validate mean output
- validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
-
- // Validate std_dev output
- validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
-}
-TEST_SUITE_END() // F32
-
-TEST_SUITE_END() // MeanStdDev
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Median3x3.cpp b/tests/validation/CL/Median3x3.cpp
deleted file mode 100644
index 9a09ae5be5..0000000000
--- a/tests/validation/CL/Median3x3.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Median3x3Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Median3x3)
-template <typename T>
-using CLMedian3x3Fixture = Median3x3ValidationFixture<CLTensor, CLAccessor, CLMedian3x3, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMedian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMedian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/MinMaxLocation.cpp b/tests/validation/CL/MinMaxLocation.cpp
deleted file mode 100644
index 1ad863d90e..0000000000
--- a/tests/validation/CL/MinMaxLocation.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/MinMaxLocationFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(MinMaxLocation)
-
-template <typename T>
-using CLMinMaxLocationFixture = MinMaxLocationValidationFixture<CLTensor, CLAccessor, CLArray<Coordinates2D>, CLArrayAccessor<Coordinates2D>, CLMinMaxLocation, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMinMaxLocationFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- validate_min_max_loc(_target, _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMinMaxLocationFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::U8)))
-{
- validate_min_max_loc(_target, _reference);
-}
-
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMinMaxLocationFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::S16)))
-{
- validate_min_max_loc(_target, _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMinMaxLocationFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::S16)))
-{
- validate_min_max_loc(_target, _reference);
-}
-
-TEST_SUITE_END() // S16
-
-TEST_SUITE(Float)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMinMaxLocationFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::F32)))
-{
- validate_min_max_loc(_target, _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMinMaxLocationFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::F32)))
-{
- validate_min_max_loc(_target, _reference);
-}
-
-TEST_SUITE_END() // F32
-
-TEST_SUITE_END() // MinMaxLocation
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/NonLinearFilter.cpp b/tests/validation/CL/NonLinearFilter.cpp
deleted file mode 100644
index 3fd9d5cd73..0000000000
--- a/tests/validation/CL/NonLinearFilter.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/MatrixPatternDataset.h"
-#include "tests/datasets/NonLinearFilterFunctionDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/NonLinearFilterFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(NonLinearFilter)
-
-template <typename T>
-using CLNonLinearFilterFixture = NonLinearFilterValidationFixture<CLTensor, CLAccessor, CLNonLinearFilter, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLNonLinearFilterFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::SmallShapes(),
- datasets::NonLinearFilterFunctions()),
- framework::dataset::make("MaskSize", { 3U, 5U })),
- datasets::MatrixPatterns()),
- datasets::BorderModes()),
- framework::dataset::make("DataType", DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), _border_size));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLNonLinearFilterFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(),
- datasets::NonLinearFilterFunctions()),
- framework::dataset::make("MaskSize", { 3U, 5U })),
- datasets::MatrixPatterns()),
- datasets::BorderModes()),
- framework::dataset::make("DataType", DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), _border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/OpticalFlow.cpp b/tests/validation/CL/OpticalFlow.cpp
deleted file mode 100644
index 3636a8f847..0000000000
--- a/tests/validation/CL/OpticalFlow.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/OpticalFlowDataset.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/OpticalFlowFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(OpticalFlow)
-
-// *INDENT-OFF*
-// clang-format off
-using CLOpticalFlowFixture = OpticalFlowValidationFixture<CLTensor,
- CLAccessor,
- CLKeyPointArray,
- CLArrayAccessor<KeyPoint>,
- CLOpticalFlow,
- CLPyramid,
- CLGaussianPyramidHalf,
- uint8_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLOpticalFlowFixture, framework::DatasetMode::NIGHTLY, combine(combine(
- datasets::SmallOpticalFlowDataset(),
- framework::dataset::make("Format", Format::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- CLArrayAccessor<KeyPoint> array(_target);
-
- validate_keypoints(array.buffer(),
- array.buffer() + array.num_values(),
- _reference.begin(),
- _reference.end());
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLOpticalFlowFixture, framework::DatasetMode::NIGHTLY, combine(combine(
- datasets::LargeOpticalFlowDataset(),
- framework::dataset::make("Format", Format::U8)),
- datasets::BorderModes()))
-{
- // Validate output
- CLArrayAccessor<KeyPoint> array(_target);
-
- validate_keypoints(array.buffer(),
- array.buffer() + array.num_values(),
- _reference.begin(),
- _reference.end());
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Phase.cpp b/tests/validation/CL/Phase.cpp
deleted file mode 100644
index be7f9df2be..0000000000
--- a/tests/validation/CL/Phase.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLPhase.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/PhaseFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Phase)
-
-template <typename T>
-using CLPhaseFixture = PhaseValidationFixture<CLTensor, CLAccessor, CLPhase, T>;
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPhaseFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S16)),
- framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
- // Validate output
- validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPhaseFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
- framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
- // Validate output
- validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-TEST_SUITE_END() // S16
-
-TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPhaseFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S32)),
- framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
- // Validate output
- validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPhaseFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S32)),
- framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
- // Validate output
- validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-TEST_SUITE_END() // S32
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Scharr.cpp b/tests/validation/CL/Scharr.cpp
deleted file mode 100644
index ed1fec82fd..0000000000
--- a/tests/validation/CL/Scharr.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/GradientDimensionDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ScharrFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Scharr)
-
-TEST_SUITE(W3x3)
-using CLScharr3x3Fixture = ScharrValidationFixture<CLTensor, CLAccessor, CLScharr3x3, uint8_t, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- datasets::GradientDimensions()))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- datasets::GradientDimensions()))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Sobel.cpp b/tests/validation/CL/Sobel.cpp
deleted file mode 100644
index 3aee0fe2e1..0000000000
--- a/tests/validation/CL/Sobel.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
-#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/SobelFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Sobel)
-
-TEST_SUITE(W3x3)
-using CLSobel3x3Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel3x3, uint8_t, int16_t>;
-
-TEST_SUITE(X)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(Y)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
- // Validate output
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
- // Validate output
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(XY)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(W5x5)
-using CLSobel5x5Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel5x5, uint8_t, int16_t>;
-
-TEST_SUITE(X)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-TEST_SUITE_END()
-TEST_SUITE(Y)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
- // Validate output
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
- // Validate output
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE(XY)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(W7x7)
-using CLSobel7x7Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel7x7, uint8_t, int32_t>;
-
-TEST_SUITE(X)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-TEST_SUITE_END()
-TEST_SUITE(Y)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
- // Validate output
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
- // Validate output
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE(XY)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
- Format::U8)),
- framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
- // Validate output
- ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
- ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
- validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/TableLookup.cpp b/tests/validation/CL/TableLookup.cpp
deleted file mode 100644
index 415b91c42d..0000000000
--- a/tests/validation/CL/TableLookup.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLLutAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/TableLookupFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(TableLookup)
-
-template <typename T>
-using CLTableLookupFixture = TableLookupValidationFixture<CLTensor, CLAccessor, CLTableLookup, CLLutAccessor<T>, CLLut, T>;
-TEST_SUITE(U8)
-
-FIXTURE_DATA_TEST_CASE(RunSmallU8, CLTableLookupFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeU8, CLTableLookupFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmallS16, CLTableLookupFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S16)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeS16, CLTableLookupFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::S16)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Threshold.cpp b/tests/validation/CL/Threshold.cpp
deleted file mode 100644
index be26245b7e..0000000000
--- a/tests/validation/CL/Threshold.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLThreshold.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/datasets/ThresholdDataset.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ThresholdFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Threshold)
-
-template <typename T>
-using CLThresholdFixture = ThresholdValidationFixture<CLTensor, CLAccessor, CLThreshold, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLThresholdFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), datasets::MixedThresholdDataset()),
- framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLThresholdFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), datasets::MixedThresholdDataset()),
- framework::dataset::make("DataType",
- DataType::U8)))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/WarpAffine.cpp b/tests/validation/CL/WarpAffine.cpp
deleted file mode 100644
index d10ba7f502..0000000000
--- a/tests/validation/CL/WarpAffine.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/InterpolationPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/WarpAffineFixture.h"
-#include "tests/validation/reference/Utils.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Tolerance */
-constexpr AbsoluteTolerance<uint8_t> tolerance(1);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(WarpAffine)
-
-template <typename T>
-using CLWarpAffineFixture = WarpAffineValidationFixture<CLTensor, CLAccessor, CLWarpAffine, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWarpAffineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)),
- framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance, 0.02f);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWarpAffineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)),
- framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- datasets::BorderModes()))
-{
- // Validate output
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance, 0.02f);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/NEON/Convolution.cpp b/tests/validation/NEON/Convolution.cpp
deleted file mode 100644
index 2fb43273b1..0000000000
--- a/tests/validation/NEON/Convolution.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ConvolutionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Tolerance value for comparing reference's output against implementation
- *
- * This is due to the fact that Neon target performs multiplication with reciprocal of scale,
- * while reference performs direct division with scale.
- */
-constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
-constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
-} // namespace
-
-TEST_SUITE(NEON)
-TEST_SUITE(CustomConvolution)
-TEST_SUITE(Square3x3)
-template <typename T>
-using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution3x3, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 3 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 3 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square3x3
-
-TEST_SUITE(Square5x5)
-template <typename T>
-using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution5x5, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square5x5
-
-TEST_SUITE(Square7x7)
-template <typename T>
-using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution7x7, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square7x7
-
-TEST_SUITE(Square9x9)
-template <typename T>
-using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution9x9, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square9x9
-
-TEST_SUITE(Rectangle)
-template <typename T>
-using NEConvolutionFixture = ConvolutionRectangleValidationFixture<Tensor, Accessor, NEConvolutionRectangle, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
- framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
- framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Rectangle
-
-TEST_SUITE(Separable5x5)
-template <typename T>
-using NEConvolutionFixture = ConvolutionSeparableValidationFixture<Tensor, Accessor, NEConvolution5x5, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 5 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Separable5x5
-
-TEST_SUITE(Separable7x7)
-template <typename T>
-using NEConvolutionFixture = ConvolutionSeparableValidationFixture<Tensor, Accessor, NEConvolution7x7, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 7 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Separable7x7
-
-TEST_SUITE(Separable9x9)
-template <typename T>
-using NEConvolutionFixture = ConvolutionSeparableValidationFixture<Tensor, Accessor, NEConvolution9x9, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::U8)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
- framework::dataset::make("DataType",
- DataType::S16)),
- datasets::BorderModes()),
- framework::dataset::make("filter_size", { 9 })))
-{
- // Validate output
- validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Separable9x9
-
-TEST_SUITE_END() // CustomConvolution
-TEST_SUITE_END() // Neon
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/WarpPerspective.cpp b/tests/validation/NEON/Remap.cpp
index dd05059bed..3c02f8eece 100644
--- a/tests/validation/CL/WarpPerspective.cpp
+++ b/tests/validation/NEON/Remap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,10 +22,10 @@
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
+#include "arm_compute/runtime/NEON/functions/NERemap.h"
#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
+#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/BorderModeDataset.h"
#include "tests/datasets/ShapeDatasets.h"
@@ -33,7 +33,7 @@
#include "tests/framework/Macros.h"
#include "tests/framework/datasets/Datasets.h"
#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/WarpPerspectiveFixture.h"
+#include "tests/validation/fixtures/RemapFixture.h"
namespace arm_compute
{
@@ -43,31 +43,33 @@ namespace validation
{
namespace
{
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-constexpr float tolerance_number = 0.2f;
+constexpr AbsoluteTolerance<uint8_t> tolerance_value(0);
+constexpr float tolerance_number = 0.f;
} // namespace
-TEST_SUITE(CL)
-TEST_SUITE(WarpPerspective)
+TEST_SUITE(NEON)
+TEST_SUITE(Remap)
template <typename T>
-using CLWarpPerspectiveFixture = WarpPerspectiveValidationFixture<CLTensor, CLAccessor, CLWarpPerspective, T>;
+using NERemapFixture = RemapValidationFixture<Tensor, Accessor, NERemap, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWarpPerspectiveFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, NERemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+ framework::dataset::make("DataType",
+ DataType::U8)),
+ framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
{
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+ // Validate output
+ validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWarpPerspectiveFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
- DataType::U8)),
- framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- datasets::BorderModes()))
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NERemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+ framework::dataset::make("DataType",
+ DataType::U8)),
+ framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
{
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+ // Validate output
+ validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
}
-
TEST_SUITE_END()
TEST_SUITE_END()
} // namespace validation
diff --git a/tests/validation/fixtures/AbsoluteDifferenceFixture.h b/tests/validation/fixtures/AbsoluteDifferenceFixture.h
deleted file mode 100644
index 46118c9323..0000000000
--- a/tests/validation/fixtures/AbsoluteDifferenceFixture.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_FIXTURE
-#define ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/AbsoluteDifference.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class AbsoluteDifferenceValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type0, DataType data_type1, DataType output_data_type)
- {
- _target = compute_target(shape, data_type0, data_type1, output_data_type);
- _reference = compute_reference(shape, data_type0, data_type1, output_data_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, int i)
- {
- library->fill_tensor_uniform(tensor, i);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
- {
- // Create tensors
- TensorType ref_src1 = create_tensor<TensorType>(shape, data_type0, 1);
- TensorType ref_src2 = create_tensor<TensorType>(shape, data_type1, 1);
- TensorType dst = create_tensor<TensorType>(shape, output_data_type, 1);
-
- // Create and configure function
- FunctionType abs_diff;
- abs_diff.configure(&ref_src1, &ref_src2, &dst);
-
- ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- ref_src1.allocator()->allocate();
- ref_src2.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(ref_src1), 0);
- fill(AccessorType(ref_src2), 1);
-
- // Compute function
- abs_diff.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
- {
- // Create reference
- SimpleTensor<T> ref_src1{ shape, data_type0, 1 };
- SimpleTensor<T> ref_src2{ shape, data_type1, 1 };
-
- // Fill reference
- fill(ref_src1, 0);
- fill(ref_src2, 1);
-
- return reference::absolute_difference<T>(ref_src1, ref_src2, output_data_type);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_FIXTURE */
diff --git a/tests/validation/fixtures/AccumulateFixture.h b/tests/validation/fixtures/AccumulateFixture.h
deleted file mode 100644
index 7cea29c15d..0000000000
--- a/tests/validation/fixtures/AccumulateFixture.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_ACCUMULATE_FIXTURE
-#define ARM_COMPUTE_TEST_ACCUMULATE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/Accumulate.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class AccumulateBaseValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, DataType output_data_type)
- {
- _target = compute_target(shape, data_type, output_data_type);
- _reference = compute_reference(shape, data_type, output_data_type);
- }
-
-protected:
- template <typename U, typename D>
- void fill(U &&tensor, int i, D max)
- {
- library->fill_tensor_uniform(tensor, i, static_cast<D>(0), max);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, DataType output_data_type)
- {
- // Create tensors
- TensorType ref_src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, output_data_type);
-
- // Create and configure function
- FunctionType accum;
- accum_conf(accum, ref_src, dst);
-
- ARM_COMPUTE_EXPECT(ref_src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- ref_src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!ref_src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- const T1 max = std::numeric_limits<T1>::max();
-
- // Fill tensors
- fill(AccessorType(ref_src), 0, max);
- fill(AccessorType(dst), 1, static_cast<T2>(max));
-
- // Compute function
- accum.run();
-
- return dst;
- }
-
- SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType data_type, DataType output_data_type)
- {
- // Create reference
- SimpleTensor<T1> ref_src{ shape, data_type };
-
- const T1 max = std::numeric_limits<T1>::max();
-
- // Fill reference
- fill(ref_src, 0, max);
-
- return accum_ref(ref_src, output_data_type);
- }
-
- virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) = 0;
-
- virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) = 0;
-
- TensorType _target{};
- SimpleTensor<T2> _reference{};
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class AccumulateValidationFixture : public AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, DataType output_data_type)
- {
- AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, data_type, output_data_type);
- }
-
- virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) override
- {
- func.configure(&input, &accum);
- }
-
- virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) override
- {
- return reference::accumulate<T1, T2>(input, output_data_type);
- }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class AccumulateWeightedValidationFixture : public AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, DataType output_data_type)
- {
- std::mt19937 gen(library->seed());
- std::uniform_real_distribution<float> float_dist(0, 1);
-
- _alpha = float_dist(gen);
-
- AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, data_type, output_data_type);
- }
-
- virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) override
- {
- func.configure(&input, _alpha, &accum);
- }
-
- virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) override
- {
- return reference::accumulate_weighted<T1, T2>(input, _alpha, output_data_type);
- }
-
- float _alpha{ 0.f };
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class AccumulateSquaredValidationFixture : public AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, DataType output_data_type)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint32_t> int_dist(0, 15);
-
- _shift = int_dist(gen);
-
- AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, data_type, output_data_type);
- }
-
- virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) override
- {
- func.configure(&input, _shift, &accum);
- }
-
- virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) override
- {
- return reference::accumulate_squared<T1, T2>(input, _shift, output_data_type);
- }
-
- uint32_t _shift{ 0U };
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ACCUMULATE_FIXTURE */
diff --git a/tests/validation/fixtures/Box3x3Fixture.h b/tests/validation/fixtures/Box3x3Fixture.h
deleted file mode 100644
index 8caeec8400..0000000000
--- a/tests/validation/fixtures/Box3x3Fixture.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_BOX3X3_FIXTURE
-#define ARM_COMPUTE_TEST_BOX3X3_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Box3x3.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class Box3x3ValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType box3x3;
- box3x3.configure(&src, &dst, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- box3x3.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::box3x3<T>(src, border_mode, constant_border_value);
- }
-
- BorderMode _border_mode{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_BOX3X3_FIXTURE */
diff --git a/tests/validation/fixtures/CannyEdgeFixture.h b/tests/validation/fixtures/CannyEdgeFixture.h
deleted file mode 100644
index 8e82e6d2f0..0000000000
--- a/tests/validation/fixtures/CannyEdgeFixture.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_CANNY_EDGE_FIXTURE
-#define ARM_COMPUTE_TEST_CANNY_EDGE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/CannyEdgeDetector.h"
-
-namespace arm_compute
-{
-class CLCannyEdge;
-class NECannyEdge;
-
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename ArrayType, typename FunctionType, typename T>
-class CannyEdgeValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(std::string image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, Format format)
- {
- CannyEdgeParameters params = canny_edge_parameters();
-
- _target = compute_target(image, gradient_size, norm_type, border_mode, format, params);
- _reference = compute_reference(image, gradient_size, norm_type, border_mode, format, params);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, RawTensor raw)
- {
- library->fill(tensor, raw);
- }
-
- TensorType compute_target(const std::string &image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, Format format, const CannyEdgeParameters &params)
- {
- // Load the image (cached by the library if loaded before)
- const RawTensor &raw = library->get(image, format);
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(raw.shape(), format);
- TensorType dst = create_tensor<TensorType>(raw.shape(), format);
- src.info()->set_format(format);
- dst.info()->set_format(format);
-
- // Create Canny edge configure function
- FunctionType canny_edge;
- canny_edge.configure(&src, &dst, params.upper_thresh, params.lower_thresh, gradient_size, static_cast<int>(norm_type) + 1, border_mode, params.constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src), raw);
-
- // Compute function
- canny_edge.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const std::string &image, int gradient_size, MagnitudeType norm_type, BorderMode border_mode, Format format, const CannyEdgeParameters &params)
- {
- ARM_COMPUTE_ERROR_ON(format != Format::U8);
-
- // Load the image (cached by the library if loaded before)
- const RawTensor &raw = library->get(image, format);
-
- // Create reference
- SimpleTensor<T> src{ raw.shape(), format };
-
- // Fill reference
- fill(src, raw);
-
- return reference::canny_edge_detector<T>(src, params.upper_thresh, params.lower_thresh, gradient_size, norm_type, border_mode, params.constant_border_value);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CANNY_EDGE_FIXTURE */
diff --git a/tests/validation/fixtures/ChannelCombineFixture.h b/tests/validation/fixtures/ChannelCombineFixture.h
deleted file mode 100644
index f0d927a992..0000000000
--- a/tests/validation/fixtures/ChannelCombineFixture.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_CHANNEL_COMBINE_FIXTURE
-#define ARM_COMPUTE_TEST_CHANNEL_COMBINE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/ChannelCombine.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-template <typename TensorType>
-inline std::vector<TensorType> create_tensor_planes(const TensorShape &shape, Format format)
-{
- TensorShape image_shape = adjust_odd_shape(shape, format);
- TensorInfo info(image_shape, Format::U8);
-
- std::vector<TensorType> tensor_planes;
-
- switch(format)
- {
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUV444:
- {
- tensor_planes.resize(3);
-
- if(format == Format::RGBA8888)
- {
- tensor_planes.resize(4);
- }
-
- for(unsigned int plane_idx = 0; plane_idx < tensor_planes.size(); ++plane_idx)
- {
- tensor_planes[plane_idx].allocator()->init(info);
- }
-
- break;
- }
- case Format::YUYV422:
- case Format::UYVY422:
- {
- const TensorShape uv_shape = calculate_subsampled_shape(image_shape, format);
- const TensorInfo info_hor2(uv_shape, Format::U8);
-
- tensor_planes.resize(3);
-
- tensor_planes[0].allocator()->init(info);
- tensor_planes[1].allocator()->init(info_hor2);
- tensor_planes[2].allocator()->init(info_hor2);
- break;
- }
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- {
- const TensorShape sub2_shape = calculate_subsampled_shape(image_shape, format);
- const TensorInfo info_sub2(sub2_shape, Format::U8);
-
- tensor_planes.resize(3);
-
- tensor_planes[0].allocator()->init(info);
- tensor_planes[1].allocator()->init(info_sub2);
- tensor_planes[2].allocator()->init(info_sub2);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
-
- return tensor_planes;
-}
-} // namespace
-
-template <typename MultiImageType, typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ChannelCombineValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, Format format)
- {
- _num_planes = num_planes_from_format(format);
- _target = compute_target(shape, format);
- _reference = compute_reference(shape, format);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, int i)
- {
- library->fill_tensor_uniform(tensor, i);
- }
-
- template <typename U>
- std::vector<SimpleTensor<U>> create_tensor_planes_reference(const TensorShape &shape, Format format)
- {
- std::vector<SimpleTensor<U>> tensor_planes;
-
- TensorShape image_shape = adjust_odd_shape(shape, format);
-
- switch(format)
- {
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUV444:
- {
- if(format == Format::RGBA8888)
- {
- tensor_planes.emplace_back(image_shape, Format::U8);
- }
-
- tensor_planes.emplace_back(image_shape, Format::U8);
- tensor_planes.emplace_back(image_shape, Format::U8);
- tensor_planes.emplace_back(image_shape, Format::U8);
- break;
- }
- case Format::YUYV422:
- case Format::UYVY422:
- {
- const TensorShape hor2_shape = calculate_subsampled_shape(image_shape, format);
-
- tensor_planes.emplace_back(image_shape, Format::U8);
- tensor_planes.emplace_back(hor2_shape, Format::U8);
- tensor_planes.emplace_back(hor2_shape, Format::U8);
- break;
- }
- case Format::NV12:
- case Format::NV21:
- case Format::IYUV:
- {
- const TensorShape shape_sub2 = calculate_subsampled_shape(image_shape, format);
-
- tensor_planes.emplace_back(image_shape, Format::U8);
- tensor_planes.emplace_back(shape_sub2, Format::U8);
- tensor_planes.emplace_back(shape_sub2, Format::U8);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
-
- return tensor_planes;
- }
-
- MultiImageType compute_target(const TensorShape &shape, Format format)
- {
- // Create tensors
- std::vector<TensorType> ref_src = create_tensor_planes<TensorType>(shape, format);
- MultiImageType dst = create_multi_image<MultiImageType>(shape, format);
-
- // Create and configure function
- FunctionType channel_combine;
-
- if(1 == _num_planes)
- {
- const TensorType *tensor_extra = ((Format::RGBA8888 == format) ? &ref_src[3] : nullptr);
- TensorType *tensor_dst = dynamic_cast<TensorType *>(dst.plane(0));
-
- channel_combine.configure(&ref_src[0], &ref_src[1], &ref_src[2], tensor_extra, tensor_dst);
- }
- else
- {
- channel_combine.configure(&ref_src[0], &ref_src[1], &ref_src[2], &dst);
- }
-
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- const TensorType *dst_plane = static_cast<const TensorType *>(dst.plane(plane_idx));
-
- ARM_COMPUTE_EXPECT(dst_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- for(unsigned int plane_idx = 0; plane_idx < ref_src.size(); ++plane_idx)
- {
- ARM_COMPUTE_EXPECT(ref_src[plane_idx].info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Allocate tensors
- dst.allocate();
-
- for(unsigned int plane_idx = 0; plane_idx < ref_src.size(); ++plane_idx)
- {
- ref_src[plane_idx].allocator()->allocate();
- }
-
- for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
- {
- const TensorType *dst_plane = static_cast<const TensorType *>(dst.plane(plane_idx));
-
- ARM_COMPUTE_EXPECT(!dst_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- for(unsigned int plane_idx = 0; plane_idx < ref_src.size(); ++plane_idx)
- {
- ARM_COMPUTE_EXPECT(!ref_src[plane_idx].info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Fill tensor planes
- for(unsigned int plane_idx = 0; plane_idx < ref_src.size(); ++plane_idx)
- {
- fill(AccessorType(ref_src[plane_idx]), plane_idx);
- }
-
- // Compute function
- channel_combine.run();
-
- return dst;
- }
-
- std::vector<SimpleTensor<T>> compute_reference(const TensorShape &shape, Format format)
- {
- // Create reference
- std::vector<SimpleTensor<T>> ref_src = create_tensor_planes_reference<T>(shape, format);
-
- // Fill references
- for(unsigned int plane_idx = 0; plane_idx < ref_src.size(); ++plane_idx)
- {
- fill(ref_src[plane_idx], plane_idx);
- }
-
- return reference::channel_combine<T>(shape, ref_src, format);
- }
-
- unsigned int _num_planes{};
- MultiImageType _target{};
- std::vector<SimpleTensor<T>> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CHANNEL_COMBINE_FIXTURE */
diff --git a/tests/validation/fixtures/ChannelExtractFixture.h b/tests/validation/fixtures/ChannelExtractFixture.h
deleted file mode 100644
index 2f5694fcb1..0000000000
--- a/tests/validation/fixtures/ChannelExtractFixture.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_CHANNEL_EXTRACT_FIXTURE
-#define ARM_COMPUTE_TEST_CHANNEL_EXTRACT_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/ChannelExtract.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename MultiImageType, typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ChannelExtractValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, Format format, Channel channel)
- {
- shape = adjust_odd_shape(shape, format);
-
- _target = compute_target(shape, format, channel);
- _reference = compute_reference(shape, format, channel);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, int i)
- {
- library->fill_tensor_uniform(tensor, i);
- }
-
- std::vector<SimpleTensor<T>> create_tensor_planes_reference(const TensorShape &shape, Format format)
- {
- TensorShape input = adjust_odd_shape(shape, format);
-
- std::vector<SimpleTensor<T>> tensor_planes;
-
- switch(format)
- {
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- {
- tensor_planes.emplace_back(input, format);
- break;
- }
- case Format::NV12:
- case Format::NV21:
- {
- const TensorShape shape_uv88 = calculate_subsampled_shape(shape, Format::UV88);
-
- tensor_planes.emplace_back(input, Format::U8);
- tensor_planes.emplace_back(shape_uv88, Format::UV88);
- break;
- }
- case Format::IYUV:
- {
- const TensorShape shape_sub2 = calculate_subsampled_shape(shape, Format::IYUV);
-
- tensor_planes.emplace_back(input, Format::U8);
- tensor_planes.emplace_back(shape_sub2, Format::U8);
- tensor_planes.emplace_back(shape_sub2, Format::U8);
- break;
- }
- case Format::YUV444:
- tensor_planes.emplace_back(input, Format::U8);
- tensor_planes.emplace_back(input, Format::U8);
- tensor_planes.emplace_back(input, Format::U8);
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
-
- return tensor_planes;
- }
-
- TensorType compute_target(const TensorShape &shape, Format format, Channel channel)
- {
- const unsigned int num_planes = num_planes_from_format(format);
-
- TensorShape dst_shape = calculate_subsampled_shape(shape, format, channel);
-
- // Create tensors
- MultiImageType ref_src = create_multi_image<MultiImageType>(shape, format);
- TensorType dst = create_tensor<TensorType>(dst_shape, Format::U8);
-
- // Create and configure function
- FunctionType channel_extract;
-
- if(1U == num_planes)
- {
- const TensorType *plane_src = static_cast<TensorType *>(ref_src.plane(0));
-
- channel_extract.configure(plane_src, channel, &dst);
- }
- else
- {
- channel_extract.configure(&ref_src, channel, &dst);
- }
-
- for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
- {
- const TensorType *src_plane = static_cast<const TensorType *>(ref_src.plane(plane_idx));
-
- ARM_COMPUTE_EXPECT(src_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- ref_src.allocate();
- dst.allocator()->allocate();
-
- for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
- {
- const TensorType *src_plane = static_cast<const TensorType *>(ref_src.plane(plane_idx));
-
- ARM_COMPUTE_EXPECT(!src_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensor planes
- for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
- {
- TensorType *src_plane = static_cast<TensorType *>(ref_src.plane(plane_idx));
-
- fill(AccessorType(*src_plane), plane_idx);
- }
-
- // Compute function
- channel_extract.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, Format format, Channel channel)
- {
- const unsigned int num_planes = num_planes_from_format(format);
-
- // Create reference
- std::vector<SimpleTensor<T>> ref_src = create_tensor_planes_reference(shape, format);
-
- // Fill references
- for(unsigned int plane_idx = 0; plane_idx < num_planes; ++plane_idx)
- {
- fill(ref_src[plane_idx], plane_idx);
- }
-
- return reference::channel_extract<T>(shape, ref_src, format, channel);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CHANNEL_EXTRACT_FIXTURE */
diff --git a/tests/validation/fixtures/ColorConvertFixture.h b/tests/validation/fixtures/ColorConvertFixture.h
deleted file mode 100644
index a5ed5548d5..0000000000
--- a/tests/validation/fixtures/ColorConvertFixture.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_COLOR_CONVERT_FIXTURE
-#define ARM_COMPUTE_TEST_COLOR_CONVERT_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/ColorConvert.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-}
-template <typename MultiImageType, typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ColorConvertValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, Format src_format, Format dst_format)
- {
- shape = adjust_odd_shape(shape, src_format);
- shape = adjust_odd_shape(shape, dst_format);
-
- _target = compute_target(shape, src_format, dst_format);
- _reference = compute_reference(shape, src_format, dst_format);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, int i)
- {
- library->fill_tensor_uniform(tensor, i);
- }
-
- std::vector<SimpleTensor<T>> create_tensor_planes_reference(const TensorShape &shape, Format format)
- {
- std::vector<SimpleTensor<T>> tensor_planes;
-
- switch(format)
- {
- case Format::RGB888:
- case Format::RGBA8888:
- case Format::YUYV422:
- case Format::UYVY422:
- {
- tensor_planes.emplace_back(shape, format);
- break;
- }
- case Format::NV12:
- case Format::NV21:
- {
- const TensorShape shape_uv88 = calculate_subsampled_shape(shape, Format::UV88);
-
- tensor_planes.emplace_back(shape, Format::U8);
- tensor_planes.emplace_back(shape_uv88, Format::UV88);
- break;
- }
- case Format::IYUV:
- {
- const TensorShape shape_sub2 = calculate_subsampled_shape(shape, Format::IYUV);
-
- tensor_planes.emplace_back(shape, Format::U8);
- tensor_planes.emplace_back(shape_sub2, Format::U8);
- tensor_planes.emplace_back(shape_sub2, Format::U8);
- break;
- }
- case Format::YUV444:
- {
- tensor_planes.emplace_back(shape, Format::U8);
- tensor_planes.emplace_back(shape, Format::U8);
- tensor_planes.emplace_back(shape, Format::U8);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
-
- return tensor_planes;
- }
-
- MultiImageType compute_target(const TensorShape &shape, Format src_format, Format dst_format)
- {
- _src_num_planes = num_planes_from_format(src_format);
- _dst_num_planes = num_planes_from_format(dst_format);
-
- // Create tensors
- MultiImageType ref_src = create_multi_image<MultiImageType>(shape, src_format);
- MultiImageType ref_dst = create_multi_image<MultiImageType>(shape, dst_format);
-
- // Create and configure function
- FunctionType color_convert;
-
- if(1U == _src_num_planes)
- {
- const TensorType *plane_src = static_cast<TensorType *>(ref_src.plane(0));
-
- if(1U == _dst_num_planes)
- {
- TensorType *plane_dst = static_cast<TensorType *>(ref_dst.plane(0));
- color_convert.configure(plane_src, plane_dst);
- }
- else
- {
- color_convert.configure(plane_src, &ref_dst);
- }
- }
- else
- {
- if(1U == _dst_num_planes)
- {
- TensorType *plane_dst = static_cast<TensorType *>(ref_dst.plane(0));
- color_convert.configure(&ref_src, plane_dst);
- }
- else
- {
- color_convert.configure(&ref_src, &ref_dst);
- }
- }
-
- for(unsigned int plane_idx = 0; plane_idx < _src_num_planes; ++plane_idx)
- {
- const TensorType *src_plane = static_cast<const TensorType *>(ref_src.plane(plane_idx));
-
- ARM_COMPUTE_EXPECT(src_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- const TensorType *dst_plane = static_cast<const TensorType *>(ref_dst.plane(plane_idx));
-
- ARM_COMPUTE_EXPECT(dst_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Allocate tensors
- ref_src.allocate();
- ref_dst.allocate();
-
- for(unsigned int plane_idx = 0; plane_idx < _src_num_planes; ++plane_idx)
- {
- const TensorType *src_plane = static_cast<const TensorType *>(ref_src.plane(plane_idx));
- ARM_COMPUTE_EXPECT(!src_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
- {
- const TensorType *dst_plane = static_cast<const TensorType *>(ref_dst.plane(plane_idx));
- ARM_COMPUTE_EXPECT(!dst_plane->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Fill tensor planes
- for(unsigned int plane_idx = 0; plane_idx < _src_num_planes; ++plane_idx)
- {
- TensorType *src_plane = static_cast<TensorType *>(ref_src.plane(plane_idx));
-
- fill(AccessorType(*src_plane), plane_idx);
- }
-
- // Compute function
- color_convert.run();
-
- return ref_dst;
- }
-
- std::vector<SimpleTensor<T>> compute_reference(const TensorShape &shape, Format src_format, Format dst_format)
- {
- // Create reference
- std::vector<SimpleTensor<T>> ref_src = create_tensor_planes_reference(shape, src_format);
-
- // Fill references
- for(unsigned int plane_idx = 0; plane_idx < ref_src.size(); ++plane_idx)
- {
- fill(ref_src[plane_idx], plane_idx);
- }
-
- return reference::color_convert<T>(shape, ref_src, src_format, dst_format);
- }
-
- unsigned int _src_num_planes{};
- unsigned int _dst_num_planes{};
- MultiImageType _target{};
- std::vector<SimpleTensor<T>> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_COLOR_CONVERT_FIXTURE */
diff --git a/tests/validation/fixtures/ConvolutionFixture.h b/tests/validation/fixtures/ConvolutionFixture.h
deleted file mode 100644
index 4692e2faf8..0000000000
--- a/tests/validation/fixtures/ConvolutionFixture.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_CONVOLUTION_FIXTURE
-#define ARM_COMPUTE_TEST_CONVOLUTION_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Convolution.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ConvolutionValidationFixture : public framework::Fixture
-{
-protected:
- template <typename...>
- void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width, const unsigned int height, const bool is_separable = false)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- std::uniform_int_distribution<uint8_t> scale_distribution(1, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- // Generate random scale value between 1 and 255.
- const uint32_t scale = scale_distribution(gen);
-
- ARM_COMPUTE_ERROR_ON(3 != width && 5 != width && 7 != width && 9 != width);
- ARM_COMPUTE_ERROR_ON(3 != height && 5 != height && 7 != height && 9 != height);
-
- std::vector<int16_t> conv(width * height);
-
- _width = width;
- _height = height;
-
- if(is_separable)
- {
- init_separable_conv(conv.data(), width, height, library->seed());
- }
- else
- {
- init_conv(conv.data(), width, height, library->seed());
- }
-
- _target = compute_target(shape, output_data_type, conv.data(), scale, border_mode, constant_border_value);
- _reference = compute_reference(shape, output_data_type, conv.data(), scale, border_mode, constant_border_value);
- }
-
- template <typename U>
- void fill(U &&tensor, int i)
- {
- library->fill_tensor_uniform(tensor, i);
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create reference
- SimpleTensor<uint8_t> src{ shape, DataType::U8 };
-
- // Fill reference
- fill(src, 0);
-
- // Compute reference
- return reference::convolution<T>(src, output_data_type, conv, scale, border_mode, constant_border_value, _width, _height);
- }
-
- virtual TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) = 0;
-
- BorderMode _border_mode{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
- unsigned int _width{};
- unsigned int _height{};
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ConvolutionSquareValidationFixture : public ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width)
- {
- ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, output_data_type, border_mode, width, width);
- }
-
-protected:
- TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, DataType::U8);
- TensorType dst = create_tensor<TensorType>(shape, output_data_type);
-
- // Create and configure function
- FunctionType convolution;
- convolution.configure(&src, &dst, conv, scale, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- this->fill(AccessorType(src), 0);
- this->fill(AccessorType(dst), 1);
-
- // Compute function
- convolution.run();
-
- return dst;
- }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ConvolutionSeparableValidationFixture : public ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width)
- {
- ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, output_data_type, border_mode, width, width, true);
- }
-
-protected:
- TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, DataType::U8);
- TensorType dst = create_tensor<TensorType>(shape, output_data_type);
-
- // Create and configure function
- FunctionType convolution;
- convolution.configure(&src, &dst, conv, scale, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- this->fill(AccessorType(src), 0);
- this->fill(AccessorType(dst), 1);
-
- // Compute function
- convolution.run();
-
- return dst;
- }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ConvolutionRectangleValidationFixture : public ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType output_data_type, BorderMode border_mode, const unsigned int width, const unsigned int height)
- {
- ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, output_data_type, border_mode, width, height);
- }
-
-protected:
- TensorType compute_target(const TensorShape &shape, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, DataType::U8);
- TensorType dst = create_tensor<TensorType>(shape, output_data_type);
-
- // Create and configure function
- FunctionType convolution;
- convolution.configure(&src, &dst, conv, this->_width, this->_height, scale, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- this->fill(AccessorType(src), 0);
- this->fill(AccessorType(dst), 1);
-
- // Compute function
- convolution.run();
-
- return dst;
- }
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CONVOLUTION_FIXTURE */
diff --git a/tests/validation/fixtures/DerivativeFixture.h b/tests/validation/fixtures/DerivativeFixture.h
deleted file mode 100644
index e520a9e14e..0000000000
--- a/tests/validation/fixtures/DerivativeFixture.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DERIVATIVE_FIXTURE
-#define ARM_COMPUTE_TEST_DERIVATIVE_FIXTURE
-
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/Types.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Derivative.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename U>
-class DerivativeValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, BorderMode border_mode, Format format, GradientDimension gradient_dimension)
- {
- // Generate a random constant value
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> int_dist(0, 255);
- const uint8_t constant_border_value = int_dist(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, border_mode, format, constant_border_value, gradient_dimension);
- _reference = compute_reference(shape, border_mode, format, constant_border_value, gradient_dimension);
- }
-
-protected:
- template <typename V>
- void fill(V &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- template <typename V>
- void fill_zero(V &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0, static_cast<U>(0), static_cast<U>(0));
- }
-
- std::pair<TensorType, TensorType> compute_target(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value, GradientDimension gradient_dimension)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type_from_format(format));
- TensorType dst_x = create_tensor<TensorType>(shape, data_type_from_format(Format::S16));
- TensorType dst_y = create_tensor<TensorType>(shape, data_type_from_format(Format::S16));
-
- src.info()->set_format(format);
- dst_x.info()->set_format(Format::S16);
- dst_y.info()->set_format(Format::S16);
-
- FunctionType derivative;
-
- switch(gradient_dimension)
- {
- case GradientDimension::GRAD_X:
- derivative.configure(&src, &dst_x, nullptr, border_mode, constant_border_value);
- break;
- case GradientDimension::GRAD_Y:
- derivative.configure(&src, nullptr, &dst_y, border_mode, constant_border_value);
- break;
- case GradientDimension::GRAD_XY:
- derivative.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
- break;
- default:
- ARM_COMPUTE_ERROR("Gradient dimension not supported");
- }
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst_x.allocator()->allocate();
- dst_y.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
- fill_zero(AccessorType(dst_x));
- fill_zero(AccessorType(dst_y));
-
- // Compute function
- derivative.run();
-
- return std::make_pair(std::move(dst_x), std::move(dst_y));
- }
-
- std::pair<SimpleTensor<U>, SimpleTensor<U>> compute_reference(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value, GradientDimension gradient_dimension)
- {
- // Create reference
- SimpleTensor<T> src{ shape, format };
-
- // Fill reference
- fill(src);
-
- return reference::derivative<U>(src, border_mode, constant_border_value, gradient_dimension);
- }
-
- BorderMode _border_mode{ BorderMode::UNDEFINED };
- std::pair<TensorType, TensorType> _target{};
- std::pair<SimpleTensor<U>, SimpleTensor<U>> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DERIVATIVE_FIXTURE */
diff --git a/tests/validation/fixtures/DilateFixture.h b/tests/validation/fixtures/DilateFixture.h
deleted file mode 100644
index 51ed4df2d3..0000000000
--- a/tests/validation/fixtures/DilateFixture.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DILATE_FIXTURE
-#define ARM_COMPUTE_TEST_DILATE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Dilate.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DilateValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType dilate;
- dilate.configure(&src, &dst, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- dilate.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::dilate<T>(src, border_mode, constant_border_value);
- }
-
- BorderMode _border_mode{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DILATE_FIXTURE */
diff --git a/tests/validation/fixtures/EqualizeHistogramFixture.h b/tests/validation/fixtures/EqualizeHistogramFixture.h
deleted file mode 100644
index f7a0312747..0000000000
--- a/tests/validation/fixtures/EqualizeHistogramFixture.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_FIXTURE
-#define ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/EqualizeHistogram.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class EqualizeHistogramValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type)
- {
- _target = compute_target(shape, data_type);
- _reference = compute_reference(shape, data_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType equalize_histogram;
-
- equalize_histogram.configure(&src, &dst);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- equalize_histogram.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
- {
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- return reference::equalize_histogram<T>(src);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_FIXTURE */
diff --git a/tests/validation/fixtures/ErodeFixture.h b/tests/validation/fixtures/ErodeFixture.h
deleted file mode 100644
index b9f17a266a..0000000000
--- a/tests/validation/fixtures/ErodeFixture.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_ERODE_FIXTURE
-#define ARM_COMPUTE_TEST_ERODE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Erode.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ErodeValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType erode;
- erode.configure(&src, &dst, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- erode.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::erode<T>(src, border_mode, constant_border_value);
- }
-
- BorderMode _border_mode{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ERODE_FIXTURE */
diff --git a/tests/validation/fixtures/FastCornersFixture.h b/tests/validation/fixtures/FastCornersFixture.h
deleted file mode 100644
index ae66c375ef..0000000000
--- a/tests/validation/fixtures/FastCornersFixture.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_FAST_CORNERS_FIXTURE
-#define ARM_COMPUTE_TEST_FAST_CORNERS_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/FastCorners.h"
-
-#include <random>
-
-namespace arm_compute
-{
-class CLFastCorners;
-class NEFastCorners;
-
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename ArrayType, typename FunctionType, typename T>
-class FastCornersValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(std::string image, Format format, bool suppress_nonmax, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> int_dist(0, 255);
- std::uniform_real_distribution<float> real_dist(0, 255);
-
- const uint8_t constant_border_value = int_dist(gen);
- const float threshold = real_dist(gen);
-
- _target = compute_target(image, format, threshold, suppress_nonmax, border_mode, constant_border_value);
- _reference = compute_reference(image, format, threshold, suppress_nonmax, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, RawTensor raw)
- {
- library->fill(tensor, raw);
- }
-
- template <typename F, typename std::enable_if<std::is_same<F, CLFastCorners>::value, int>::type = 0>
- void configure_target(F &func, TensorType &src, ArrayType &corners, unsigned int *num_corners, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
- {
- func.configure(&src, threshold, suppress_nonmax, &corners, num_corners, border_mode, constant_border_value);
- }
-
- template <typename F, typename std::enable_if<std::is_same<F, NEFastCorners>::value, int>::type = 0>
- void configure_target(F &func, TensorType &src, ArrayType &corners, unsigned int *num_corners, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
- {
- ARM_COMPUTE_UNUSED(num_corners);
- func.configure(&src, threshold, suppress_nonmax, &corners, border_mode, constant_border_value);
- }
-
- ArrayType compute_target(const std::string &image, Format format, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Load the image (cached by the library if loaded before)
- const RawTensor &raw = library->get(image, format);
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(raw.shape(), format);
-
- // Create array of keypoints
- ArrayType corners(raw.shape().total_size());
- unsigned int num_corners = raw.shape().total_size();
-
- // Create and configure function
- FunctionType fast_corners;
- configure_target<FunctionType>(fast_corners, src, corners, &num_corners, threshold, suppress_nonmax, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src), raw);
-
- // Compute function
- fast_corners.run();
-
- return corners;
- }
-
- std::vector<KeyPoint> compute_reference(const std::string &image, Format format, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Load the image (cached by the library if loaded before)
- const RawTensor &raw = library->get(image, format);
-
- // Create reference
- SimpleTensor<T> src{ raw.shape(), format };
-
- // Fill reference
- fill(src, raw);
-
- // Compute reference
- return reference::fast_corners<T>(src, threshold, suppress_nonmax, border_mode, constant_border_value);
- }
-
- ArrayType _target{};
- std::vector<KeyPoint> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_FAST_CORNERS_FIXTURE */
diff --git a/tests/validation/fixtures/Gaussian3x3Fixture.h b/tests/validation/fixtures/Gaussian3x3Fixture.h
deleted file mode 100644
index 4a154ea0d1..0000000000
--- a/tests/validation/fixtures/Gaussian3x3Fixture.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_GAUSSIAN3X3_FIXTURE
-#define ARM_COMPUTE_TEST_GAUSSIAN3X3_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Gaussian3x3.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class Gaussian3x3ValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType gaussian3x3;
- gaussian3x3.configure(&src, &dst, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- gaussian3x3.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::gaussian3x3<T>(src, border_mode, constant_border_value);
- }
-
- BorderMode _border_mode{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GAUSSIAN3X3_FIXTURE */
diff --git a/tests/validation/fixtures/Gaussian5x5Fixture.h b/tests/validation/fixtures/Gaussian5x5Fixture.h
deleted file mode 100644
index 68f91e1656..0000000000
--- a/tests/validation/fixtures/Gaussian5x5Fixture.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_GAUSSIAN5X5_FIXTURE
-#define ARM_COMPUTE_TEST_GAUSSIAN5X5_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Gaussian5x5.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class Gaussian5x5ValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType gaussian5x5;
- gaussian5x5.configure(&src, &dst, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- gaussian5x5.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::gaussian5x5<T>(src, border_mode, constant_border_value);
- }
-
- BorderMode _border_mode{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GAUSSIAN5X5_FIXTURE */
diff --git a/tests/validation/fixtures/GaussianPyramidHalfFixture.h b/tests/validation/fixtures/GaussianPyramidHalfFixture.h
deleted file mode 100644
index f91b1d520d..0000000000
--- a/tests/validation/fixtures/GaussianPyramidHalfFixture.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_FIXTURE
-#define ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_FIXTURE
-
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/PyramidInfo.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/GaussianPyramidHalf.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename PyramidType>
-class GaussianPyramidHalfValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, BorderMode border_mode, size_t num_levels)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- _border_mode = border_mode;
-
- // Compute target and reference
- compute_target(shape, border_mode, constant_border_value, num_levels);
- compute_reference(shape, border_mode, constant_border_value, num_levels);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- void compute_target(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, DataType::U8);
-
- PyramidInfo pyramid_info(num_levels, SCALE_PYRAMID_HALF, shape, Format::U8);
- _target.init(pyramid_info);
-
- // Create and configure function
- FunctionType gaussian_pyramid;
-
- gaussian_pyramid.configure(&src, &_target, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- for(size_t i = 0; i < pyramid_info.num_levels(); ++i)
- {
- ARM_COMPUTE_EXPECT(_target.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Allocate input tensor
- src.allocator()->allocate();
-
- // Allocate pyramid
- _target.allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- for(size_t i = 0; i < pyramid_info.num_levels(); ++i)
- {
- ARM_COMPUTE_EXPECT(!_target.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- gaussian_pyramid.run();
- }
-
- void compute_reference(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels)
- {
- // Create reference
- SimpleTensor<T> src{ shape, DataType::U8 };
-
- // Fill reference
- fill(src);
-
- _reference = reference::gaussian_pyramid_half<T>(src, border_mode, constant_border_value, num_levels);
- }
-
- PyramidType _target{};
- std::vector<SimpleTensor<T>> _reference{};
- BorderMode _border_mode{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_FIXTURE */ \ No newline at end of file
diff --git a/tests/validation/fixtures/HOGDescriptorFixture.h b/tests/validation/fixtures/HOGDescriptorFixture.h
deleted file mode 100644
index 1021e12d72..0000000000
--- a/tests/validation/fixtures/HOGDescriptorFixture.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_HOG_DESCRIPTOR_FIXTURE
-#define ARM_COMPUTE_TEST_HOG_DESCRIPTOR_FIXTURE
-
-#include "arm_compute/core/HOGInfo.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/HOGDescriptor.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename HOGType, typename AccessorType, typename FunctionType, typename T, typename U>
-class HOGDescriptorValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(std::string image, HOGInfo hog_info, Format format, BorderMode border_mode)
- {
- // Only defined borders supported
- ARM_COMPUTE_ERROR_ON(border_mode == BorderMode::UNDEFINED);
-
- // Generate a random constant value
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<T> int_dist(0, 255);
- const T constant_border_value = int_dist(gen);
-
- _target = compute_target(image, format, border_mode, constant_border_value, hog_info);
- _reference = compute_reference(image, format, border_mode, constant_border_value, hog_info);
- }
-
-protected:
- template <typename V>
- void fill(V &&tensor, const std::string image, Format format)
- {
- library->fill(tensor, image, format);
- }
-
- template <typename V, typename D>
- void fill(V &&tensor, int i, D max)
- {
- library->fill_tensor_uniform(tensor, i, static_cast<D>(0), max);
- }
-
- TensorType compute_target(const std::string image, Format &format, BorderMode &border_mode, T constant_border_value, const HOGInfo &hog_info)
- {
- // Get image shape for src tensor
- TensorShape shape = library->get_image_shape(image);
-
- // Create tensor info for HOG descriptor
- TensorInfo tensor_info_hog_descriptor(hog_info, shape.x(), shape.y());
-
- // Create HOG
- HOGType hog = create_HOG<HOGType>(hog_info);
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type_from_format(format));
- TensorType dst = create_tensor<TensorType>(tensor_info_hog_descriptor.tensor_shape(), DataType::F32, tensor_info_hog_descriptor.num_channels());
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Create and configure function
- FunctionType hog_descriptor;
- hog_descriptor.configure(&src, &dst, &hog, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- const T max = std::numeric_limits<T>::max();
-
- // Fill tensors
- fill(AccessorType(src), image, format);
- fill(AccessorType(dst), 1, static_cast<U>(max));
-
- // Compute function
- hog_descriptor.run();
-
- return dst;
- }
-
- SimpleTensor<U> compute_reference(const std::string image, Format format, BorderMode border_mode, T constant_border_value, const HOGInfo &hog_info)
- {
- // Create reference
- SimpleTensor<T> src{ library->get_image_shape(image), data_type_from_format(format) };
-
- // Fill reference
- fill(src, image, format);
-
- return reference::hog_descriptor<U>(src, border_mode, constant_border_value, hog_info);
- }
-
- TensorType _target{};
- SimpleTensor<U> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_HOG_DESCRIPTOR_FIXTURE */
diff --git a/tests/validation/fixtures/HOGDetectorFixture.h b/tests/validation/fixtures/HOGDetectorFixture.h
deleted file mode 100644
index f12e65b75c..0000000000
--- a/tests/validation/fixtures/HOGDetectorFixture.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_HOG_DETECTOR_FIXTURE
-#define ARM_COMPUTE_TEST_HOG_DETECTOR_FIXTURE
-
-#include "arm_compute/core/HOGInfo.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/IHOGAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/fixtures/HOGDescriptorFixture.h"
-#include "tests/validation/reference/HOGDetector.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType,
- typename HOGType,
- typename DetectionWindowArrayType,
- typename HOGDescriptorType,
- typename AccessorType,
- typename ArrayAccessorType,
- typename HOGAccessorType,
- typename HOGDetectorType,
- typename T,
- typename U>
-class HOGDetectorValidationFixture : public HOGDescriptorValidationFixture<TensorType, HOGType, AccessorType, HOGDescriptorType, T, U>
-{
-public:
- template <typename...>
- void setup(Size2D detection_window_stride, std::string image, HOGInfo hog_info, Format format, BorderMode border_mode)
- {
- using HDF = HOGDescriptorValidationFixture<TensorType, HOGType, AccessorType, HOGDescriptorType, T, U>;
- HDF::setup(image, hog_info, format, border_mode);
-
- const unsigned int max_num_detection_windows = 100000;
-
- // Initialise descriptor (linear SVM coefficients).
- // NOTE: Fixed values are used to keep the number of detection windows detected
- // consistent in order to have meaningful validation tolerances.
- // The values are "unbalanced" to reduce the number of detected objects
- std::random_device::result_type seed = 0;
- std::vector<U> descriptor = generate_random_real(hog_info.descriptor_size(), -0.505f, 0.495f, seed);
-
- // Compute target and reference values using feature vector from descriptor kernel
- _target = compute_target(HDF::_target, descriptor, max_num_detection_windows, hog_info, detection_window_stride);
- _reference = compute_reference(HDF::_reference, descriptor, max_num_detection_windows, hog_info, detection_window_stride);
- }
-
-protected:
- std::vector<DetectionWindow> compute_target(const TensorType &src, const std::vector<U> &descriptor, unsigned int max_num_detection_windows,
- const HOGInfo &hog_info, const Size2D &detection_window_stride)
- {
- // Create HOG
- HOGType hog = create_HOG<HOGType>(hog_info);
-
- // Create array of detection windows
- DetectionWindowArrayType detection_windows(max_num_detection_windows);
-
- // Copy HOG descriptor values to HOG memory
- {
- HOGAccessorType hog_accessor(hog);
- std::memcpy(hog_accessor.descriptor(), descriptor.data(), descriptor.size() * sizeof(U));
- }
-
- // Create and configure function
- HOGDetectorType hog_detector;
- hog_detector.configure(&src, &hog, &detection_windows, detection_window_stride);
-
- // Reset detection windows
- detection_windows.clear();
-
- // Compute function
- hog_detector.run();
-
- // Create array of detection windows
- std::vector<DetectionWindow> windows;
-
- // Copy detection windows
- ArrayAccessorType accessor(detection_windows);
-
- for(size_t i = 0; i < accessor.num_values(); i++)
- {
- DetectionWindow win;
- win.x = accessor.at(i).x;
- win.y = accessor.at(i).y;
- win.width = accessor.at(i).width;
- win.height = accessor.at(i).height;
- win.idx_class = accessor.at(i).idx_class;
- win.score = accessor.at(i).score;
-
- windows.push_back(win);
- }
-
- return windows;
- }
-
- std::vector<DetectionWindow> compute_reference(const SimpleTensor<U> &src, const std::vector<U> &descriptor, unsigned int max_num_detection_windows,
- const HOGInfo &hog_info, const Size2D &detection_window_stride)
- {
- // Assumes defaults value of zero for threshold and class_idx.
- return reference::hog_detector(src, descriptor, max_num_detection_windows, hog_info, detection_window_stride);
- }
-
- std::vector<DetectionWindow> _target{};
- std::vector<DetectionWindow> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_HOG_DETECTOR_FIXTURE */
diff --git a/tests/validation/fixtures/HOGMultiDetectionFixture.h b/tests/validation/fixtures/HOGMultiDetectionFixture.h
deleted file mode 100644
index c37bdb6df6..0000000000
--- a/tests/validation/fixtures/HOGMultiDetectionFixture.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_FIXTURE
-#define ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_FIXTURE
-
-#include "arm_compute/core/HOGInfo.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/IHOGAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/HOGMultiDetection.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType,
- typename HOGType,
- typename MultiHOGType,
- typename DetectionWindowArrayType,
- typename DetectionWindowStrideType,
- typename AccessorType,
- typename Size2DArrayAccessorType,
- typename DetectionWindowArrayAccessorType,
- typename HOGAccessorType,
- typename FunctionType,
- typename T,
- typename U>
-class HOGMultiDetectionValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(std::string image, std::vector<HOGInfo> models, Format format, BorderMode border_mode, bool non_maxima_suppression)
- {
- // Only defined borders supported
- ARM_COMPUTE_ERROR_ON(border_mode == BorderMode::UNDEFINED);
-
- // Generate a random constant value
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<T> int_dist(0, 255);
- const T constant_border_value = int_dist(gen);
-
- // Initialize descriptors vector
- std::vector<std::vector<U>> descriptors(models.size());
-
- // Use default values for threshold and min_distance
- const float threshold = 0.f;
- const float min_distance = 1.f;
-
- // Maximum number of detection windows per batch
- const unsigned int max_num_detection_windows = 100000;
-
- _target = compute_target(image, format, border_mode, constant_border_value, models, descriptors, max_num_detection_windows, threshold, non_maxima_suppression, min_distance);
- _reference = compute_reference(image, format, border_mode, constant_border_value, models, descriptors, max_num_detection_windows, threshold, non_maxima_suppression, min_distance);
- }
-
-protected:
- template <typename V>
- void fill(V &&tensor, const std::string image, Format format)
- {
- library->fill(tensor, image, format);
- }
-
- void initialize_batch(const std::vector<HOGInfo> &models, MultiHOGType &multi_hog,
- std::vector<std::vector<U>> &descriptors, DetectionWindowStrideType &detection_window_strides)
- {
- for(unsigned i = 0; i < models.size(); ++i)
- {
- auto hog_model = reinterpret_cast<HOGType *>(multi_hog.model(i));
- hog_model->init(models[i]);
-
- // Initialise descriptor (linear SVM coefficients).
- std::random_device::result_type seed = 0;
- descriptors.at(i) = generate_random_real(models[i].descriptor_size(), -0.505f, 0.495f, seed);
-
- // Copy HOG descriptor values to HOG memory
- {
- HOGAccessorType hog_accessor(*hog_model);
- std::memcpy(hog_accessor.descriptor(), descriptors.at(i).data(), descriptors.at(i).size() * sizeof(U));
- }
-
- // Initialize detection window stride
- Size2DArrayAccessorType accessor(detection_window_strides);
- accessor.at(i) = models[i].block_stride();
- }
- }
-
- std::vector<DetectionWindow> compute_target(const std::string image, Format &format, BorderMode &border_mode, T constant_border_value,
- const std::vector<HOGInfo> &models, std::vector<std::vector<U>> &descriptors, unsigned int max_num_detection_windows,
- float threshold, bool non_max_suppression, float min_distance)
- {
- MultiHOGType multi_hog(models.size());
- DetectionWindowArrayType detection_windows(max_num_detection_windows);
- DetectionWindowStrideType detection_window_strides(models.size());
-
- // Resize detection window_strides for index access
- detection_window_strides.resize(models.size());
-
- // Initialiize MultiHOG and detection windows
- initialize_batch(models, multi_hog, descriptors, detection_window_strides);
-
- // Get image shape for src tensor
- TensorShape shape = library->get_image_shape(image);
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type_from_format(format));
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Create and configure function
- FunctionType hog_multi_detection;
- hog_multi_detection.configure(&src, &multi_hog, &detection_windows, &detection_window_strides, border_mode, constant_border_value, threshold, non_max_suppression, min_distance);
-
- // Reset detection windows
- detection_windows.clear();
-
- // Allocate tensors
- src.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src), image, format);
-
- // Compute function
- hog_multi_detection.run();
-
- // Copy detection windows
- std::vector<DetectionWindow> windows;
- DetectionWindowArrayAccessorType accessor(detection_windows);
-
- for(size_t i = 0; i < accessor.num_values(); i++)
- {
- DetectionWindow win;
- win.x = accessor.at(i).x;
- win.y = accessor.at(i).y;
- win.width = accessor.at(i).width;
- win.height = accessor.at(i).height;
- win.idx_class = accessor.at(i).idx_class;
- win.score = accessor.at(i).score;
-
- windows.push_back(win);
- }
-
- return windows;
- }
-
- std::vector<DetectionWindow> compute_reference(const std::string image, Format format, BorderMode border_mode, T constant_border_value,
- const std::vector<HOGInfo> &models, const std::vector<std::vector<U>> &descriptors, unsigned int max_num_detection_windows,
- float threshold, bool non_max_suppression, float min_distance)
- {
- // Create reference
- SimpleTensor<T> src{ library->get_image_shape(image), data_type_from_format(format) };
-
- // Fill reference
- fill(src, image, format);
-
- // NOTE: Detection window stride fixed to block stride
- return reference::hog_multi_detection(src, border_mode, constant_border_value, models, descriptors, max_num_detection_windows, threshold, non_max_suppression, min_distance);
- }
-
- std::vector<DetectionWindow> _target{};
- std::vector<DetectionWindow> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_FIXTURE */
diff --git a/tests/validation/fixtures/HarrisCornersFixture.h b/tests/validation/fixtures/HarrisCornersFixture.h
deleted file mode 100644
index dbe77dde9c..0000000000
--- a/tests/validation/fixtures/HarrisCornersFixture.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_HARRIS_CORNERS_FIXTURE
-#define ARM_COMPUTE_TEST_HARRIS_CORNERS_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/HarrisCornerDetector.h"
-
-namespace arm_compute
-{
-class CLHarrisCorners;
-class NEHarrisCorners;
-
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename ArrayType, typename FunctionType, typename T>
-class HarrisCornersValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(std::string image, int gradient_size, int block_size, BorderMode border_mode, Format format)
- {
- HarrisCornersParameters params = harris_corners_parameters();
-
- _target = compute_target(image, gradient_size, block_size, border_mode, format, params);
- _reference = compute_reference(image, gradient_size, block_size, border_mode, format, params);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, RawTensor raw)
- {
- library->fill(tensor, raw);
- }
-
- ArrayType compute_target(std::string image, int gradient_size, int block_size, BorderMode border_mode, Format format, const HarrisCornersParameters &params)
- {
- // Load the image (cached by the library if loaded before)
- const RawTensor &raw = library->get(image, format);
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(raw.shape(), format);
-
- // Create array of keypoints
- ArrayType corners(raw.shape().total_size());
-
- // Create harris corners configure function
- FunctionType harris_corners;
- harris_corners.configure(&src, params.threshold, params.min_dist, params.sensitivity, gradient_size, block_size, &corners, border_mode, params.constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src), raw);
-
- // Compute function
- harris_corners.run();
-
- return corners;
- }
-
- std::vector<KeyPoint> compute_reference(std::string image, int gradient_size, int block_size, BorderMode border_mode, Format format, const HarrisCornersParameters &params)
- {
- // Load the image (cached by the library if loaded before)
- const RawTensor &raw = library->get(image, format);
- // Create reference
- SimpleTensor<T> src{ raw.shape(), format };
-
- // Fill reference
- fill(src, raw);
-
- return reference::harris_corner_detector<T>(src, params.threshold, params.min_dist, params.sensitivity, gradient_size, block_size, border_mode, params.constant_border_value);
- }
-
- ArrayType _target{};
- std::vector<KeyPoint> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_HARRIS_CORNERS_FIXTURE */
diff --git a/tests/validation/fixtures/HistogramFixture.h b/tests/validation/fixtures/HistogramFixture.h
deleted file mode 100644
index dceb23b904..0000000000
--- a/tests/validation/fixtures/HistogramFixture.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_HISTOGRAM_FIXTURE
-#define ARM_COMPUTE_TEST_HISTOGRAM_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Histogram.h"
-#include "utils/Utils.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename DistributionType>
-class HistogramValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<size_t> distribution_size_t(1, 30);
- const size_t num_bins = distribution_size_t(gen);
- std::uniform_int_distribution<int32_t> distribution_int32_t(0, 125);
- const size_t offset = distribution_int32_t(gen);
- std::uniform_int_distribution<uint32_t> distribution_uint32_t(1, 255 - offset);
- const size_t range = distribution_uint32_t(gen);
-
- _target = compute_target(shape, data_type, num_bins, offset, range);
- _reference = compute_reference(shape, data_type, num_bins, offset, range);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, size_t num_bins, int32_t offset, uint32_t range)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(TensorShape(num_bins), DataType::U32);
- DistributionType distribution_dst(num_bins, offset, range);
-
- // Create and configure function
- FunctionType histogram;
- histogram.configure(&src, &distribution_dst);
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- histogram.run();
-
- // Copy the distribution in a tensor
- arm_compute::utils::map(distribution_dst, true);
- AccessorType accessor_dst = AccessorType(dst);
- uint32_t *dst_data = static_cast<uint32_t *>(accessor_dst.data());
-
- ARM_COMPUTE_EXPECT(accessor_dst.size() <= dst.info()->total_size(), framework::LogLevel::ERRORS);
-
- std::copy_n(distribution_dst.buffer(), num_bins, dst_data);
- arm_compute::utils::unmap(distribution_dst);
- return dst;
- }
-
- SimpleTensor<uint32_t> compute_reference(const TensorShape &shape, DataType data_type, size_t num_bins, int32_t offset, uint32_t range)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::histogram<T>(src, num_bins, offset, range);
- }
-
- TensorType _target{};
- SimpleTensor<uint32_t> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_HISTOGRAM_FIXTURE */
diff --git a/tests/validation/fixtures/IntegralImageFixture.h b/tests/validation/fixtures/IntegralImageFixture.h
deleted file mode 100644
index abc99730a9..0000000000
--- a/tests/validation/fixtures/IntegralImageFixture.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_INTEGRAL_IMAGE_FIXTURE
-#define ARM_COMPUTE_TEST_INTEGRAL_IMAGE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/IntegralImage.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class IntegralImageValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type)
- {
- _target = compute_target(shape);
- _reference = compute_reference(shape, data_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, DataType::U8);
- TensorType dst = create_tensor<TensorType>(shape, DataType::U32);
-
- // Create and configure function
- FunctionType integral_image;
- integral_image.configure(&src, &dst);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- integral_image.run();
-
- return dst;
- }
-
- SimpleTensor<uint32_t> compute_reference(const TensorShape &shape, DataType data_type)
- {
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- return reference::integral_image<T>(src);
- }
-
- TensorType _target{};
- SimpleTensor<uint32_t> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_INTEGRAL_IMAGE_FIXTURE */
diff --git a/tests/validation/fixtures/LaplacianPyramidFixture.h b/tests/validation/fixtures/LaplacianPyramidFixture.h
deleted file mode 100644
index 71319965b2..0000000000
--- a/tests/validation/fixtures/LaplacianPyramidFixture.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_LAPLACIAN_PYRAMID_FIXTURE
-#define ARM_COMPUTE_TEST_LAPLACIAN_PYRAMID_FIXTURE
-
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/PyramidInfo.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/LaplacianPyramid.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename U, typename PyramidType>
-class LaplacianPyramidValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape input_shape, BorderMode border_mode, size_t num_levels, Format format_in, Format format_out)
- {
- std::mt19937 generator(library->seed());
- std::uniform_int_distribution<T> distribution_u8(0, 255);
- const T constant_border_value = distribution_u8(generator);
-
- _pyramid_levels = num_levels;
- _border_mode = border_mode;
-
- _target = compute_target(input_shape, border_mode, constant_border_value, format_in, format_out);
- _reference = compute_reference(input_shape, border_mode, constant_border_value, format_in, format_out);
- }
-
-protected:
- template <typename V>
- void fill(V &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- PyramidType compute_target(const TensorShape &input_shape, BorderMode border_mode, T constant_border_value,
- Format format_in, Format format_out)
- {
- // Create pyramid
- PyramidType pyramid{};
-
- // Create Pyramid Info
- PyramidInfo pyramid_info(_pyramid_levels, SCALE_PYRAMID_HALF, input_shape, format_out);
-
- // Use conservative padding strategy to fit all subsequent kernels
- pyramid.init_auto_padding(pyramid_info);
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(input_shape, format_in);
-
- // The first two dimensions of the output tensor must match the first
- // two dimensions of the tensor in the last level of the pyramid
- TensorShape dst_shape(input_shape);
- dst_shape.set(0, pyramid.get_pyramid_level(_pyramid_levels - 1)->info()->dimension(0));
- dst_shape.set(1, pyramid.get_pyramid_level(_pyramid_levels - 1)->info()->dimension(1));
-
- // The lowest resolution tensor necessary to reconstruct the input
- // tensor from the pyramid.
- _dst_target = create_tensor<TensorType>(dst_shape, format_out);
-
- // Create and configure function
- FunctionType laplacian_pyramid;
- laplacian_pyramid.configure(&src, &pyramid, &_dst_target, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(_dst_target.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- _dst_target.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!_dst_target.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- pyramid.allocate();
-
- for(size_t i = 0; i < pyramid_info.num_levels(); ++i)
- {
- ARM_COMPUTE_EXPECT(!pyramid.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- laplacian_pyramid.run();
-
- return pyramid;
- }
-
- std::vector<SimpleTensor<U>> compute_reference(const TensorShape &shape, BorderMode border_mode, T constant_border_value,
- Format format_in, Format format_out)
- {
- // Create reference
- SimpleTensor<T> src{ shape, format_in };
-
- // The first two dimensions of the output tensor must match the first
- // two dimensions of the tensor in the last level of the pyramid
- TensorShape dst_shape(shape);
- dst_shape.set(0, static_cast<float>(shape[0] + 1) / static_cast<float>(std::pow(2, _pyramid_levels - 1)));
- dst_shape.set(1, static_cast<float>(shape[1] + 1) / static_cast<float>(std::pow(2, _pyramid_levels - 1)));
-
- _dst_reference = SimpleTensor<U>(dst_shape, format_out);
-
- // Fill reference
- fill(src);
-
- return reference::laplacian_pyramid<T, U>(src, _dst_reference, _pyramid_levels, border_mode, constant_border_value);
- }
-
- size_t _pyramid_levels{};
- BorderMode _border_mode{};
- SimpleTensor<U> _dst_reference{};
- TensorType _dst_target{};
- PyramidType _target{};
- std::vector<SimpleTensor<U>> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_LAPLACIAN_PYRAMID_FIXTURE */
diff --git a/tests/validation/fixtures/LaplacianReconstructFixture.h b/tests/validation/fixtures/LaplacianReconstructFixture.h
deleted file mode 100644
index 35432ee2cb..0000000000
--- a/tests/validation/fixtures/LaplacianReconstructFixture.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_LAPLACIAN_RECONSTRUCT_FIXTURE
-#define ARM_COMPUTE_TEST_LAPLACIAN_RECONSTRUCT_FIXTURE
-
-#include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/PyramidInfo.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/fixtures/LaplacianPyramidFixture.h"
-#include "tests/validation/reference/LaplacianPyramid.h"
-#include "tests/validation/reference/LaplacianReconstruct.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename LaplacianPyramidType, typename T, typename U, typename PyramidType>
-class LaplacianReconstructValidationFixture : public LaplacianPyramidValidationFixture<TensorType, AccessorType, LaplacianPyramidType, U, T, PyramidType>
-{
-public:
- template <typename...>
- void setup(TensorShape input_shape, BorderMode border_mode, size_t num_levels, Format format_in, Format format_out)
- {
- std::mt19937 generator(library->seed());
- std::uniform_int_distribution<U> distribution_u8(0, 255);
- const U constant_border_value = distribution_u8(generator);
-
- using LPF = LaplacianPyramidValidationFixture<TensorType, AccessorType, LaplacianPyramidType, U, T, PyramidType>;
- LPF::setup(input_shape, border_mode, num_levels, format_out, format_in);
-
- // Compute target and reference values using the pyramid and lowest
- // resolution tensor output from Laplacian Pyramid kernel
- _target = compute_target(input_shape, LPF::_target, LPF::_dst_target, border_mode, constant_border_value);
- _reference = compute_reference(LPF::_reference, LPF::_dst_reference, border_mode, constant_border_value);
- }
-
-protected:
- template <typename V>
- void fill(V &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &input_shape, PyramidType &pyramid, TensorType &low_res, BorderMode border_mode, U constant_border_value)
- {
- // Create tensors
- TensorType dst = create_tensor<TensorType>(input_shape, DataType::U8);
-
- // Create and configure function
- FunctionType laplacian_reconstruct;
- laplacian_reconstruct.configure(&pyramid, &low_res, &dst, border_mode, constant_border_value);
-
- // Allocate tensors
- dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Compute function
- laplacian_reconstruct.run();
-
- return dst;
- }
-
- SimpleTensor<U> compute_reference(const std::vector<SimpleTensor<T>> &pyramid,
- const SimpleTensor<T> &low_res, BorderMode border_mode, U constant_border_value)
- {
- return reference::laplacian_reconstruct<T, U>(pyramid, low_res, border_mode, constant_border_value);
- }
-
- TensorType _target{};
- SimpleTensor<U> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_LAPLACIAN_RECONSTRUCT_FIXTURE */
diff --git a/tests/validation/fixtures/MagnitudeFixture.h b/tests/validation/fixtures/MagnitudeFixture.h
deleted file mode 100644
index 81f4970b41..0000000000
--- a/tests/validation/fixtures/MagnitudeFixture.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE
-#define ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Magnitude.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class MagnitudeValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, Format format, MagnitudeType magnitude_type)
- {
- _target = compute_target(shape, format, magnitude_type);
- _reference = compute_reference(shape, format, magnitude_type);
- _magnitude_type = magnitude_type;
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, std::random_device::result_type seed_offset)
- {
- library->fill_tensor_uniform(tensor, seed_offset);
- }
-
- TensorType compute_target(const TensorShape &shape, Format format, MagnitudeType magnitude_type)
- {
- DataType data_type = data_type_from_format(format);
-
- // Create tensors
- TensorType src1 = create_tensor<TensorType>(shape, data_type);
- src1.info()->set_format(format);
-
- TensorType src2 = create_tensor<TensorType>(shape, data_type);
- src2.info()->set_format(format);
-
- TensorType dst = create_tensor<TensorType>(shape, data_type);
- dst.info()->set_format(format);
-
- // Create and configure function
- FunctionType magnitude;
- magnitude.configure(&src1, &src2, &dst, magnitude_type);
-
- ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src1.allocator()->allocate();
- src2.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src1), 0);
- fill(AccessorType(src2), 1);
-
- // Compute function
- magnitude.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, Format format, MagnitudeType magnitude_type)
- {
- DataType data_type = data_type_from_format(format);
-
- // Create reference
- SimpleTensor<T> src1{ shape, data_type };
- SimpleTensor<T> src2{ shape, data_type };
-
- // Fill reference
- fill(src1, 0);
- fill(src2, 1);
-
- return reference::magnitude<T>(src1, src2, magnitude_type);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
- MagnitudeType _magnitude_type{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE */
diff --git a/tests/validation/fixtures/MeanStdDevFixture.h b/tests/validation/fixtures/MeanStdDevFixture.h
deleted file mode 100644
index f3facc50de..0000000000
--- a/tests/validation/fixtures/MeanStdDevFixture.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_MEAN_STD_DEV_FIXTURE
-#define ARM_COMPUTE_TEST_MEAN_STD_DEV_FIXTURE
-
-#include "tests/Globals.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/MeanStdDev.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class MeanStdDevValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type)
- {
- _target = compute_target(shape, data_type);
- _reference = compute_reference(shape, data_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- if(tensor.data_type() == DataType::F32)
- {
- std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
- library->fill(tensor, distribution, 0);
- }
- else if(tensor.data_type() == DataType::F16)
- {
- arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
- library->fill(tensor, distribution, 0);
- }
- else
- {
- library->fill_tensor_uniform(tensor, 0);
- }
- }
-
- std::pair<float, float> compute_target(const TensorShape &shape, DataType data_type)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
-
- // Create output variables
- float mean = 0.0f;
- float std_dev = 0.0f;
-
- // Create and configure function
- FunctionType mean_std_dev;
- mean_std_dev.configure(&src, &mean, &std_dev);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- mean_std_dev.run();
-
- return std::make_pair(mean, std_dev);
- }
-
- std::pair<float, float> compute_reference(const TensorShape &shape, DataType data_type)
- {
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::mean_and_standard_deviation<T>(src);
- }
-
- std::pair<float, float> _target{};
- std::pair<float, float> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_MEAN_STD_DEV_FIXTURE */
diff --git a/tests/validation/fixtures/Median3x3Fixture.h b/tests/validation/fixtures/Median3x3Fixture.h
deleted file mode 100644
index 2b978005b2..0000000000
--- a/tests/validation/fixtures/Median3x3Fixture.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_MEDIAN3X3_FIXTURE
-#define ARM_COMPUTE_TEST_MEDIAN3X3_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Median3x3.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class Median3x3ValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution(0, 255);
- const uint8_t constant_border_value = distribution(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType median3x3;
- median3x3.configure(&src, &dst, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- median3x3.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::median3x3<T>(src, border_mode, constant_border_value);
- }
-
- BorderMode _border_mode{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_MEDIAN3X3_FIXTURE */
diff --git a/tests/validation/fixtures/MinMaxLocationFixture.h b/tests/validation/fixtures/MinMaxLocationFixture.h
deleted file mode 100644
index 73466cce54..0000000000
--- a/tests/validation/fixtures/MinMaxLocationFixture.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_MIN_MAX_LOCATION_FIXTURE
-#define ARM_COMPUTE_TEST_MIN_MAX_LOCATION_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/Types.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/MinMaxLocation.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename ArrayType, typename ArrayAccessorType, typename FunctionType, typename T>
-class MinMaxLocationValidationFixture : public framework::Fixture
-{
-public:
- using target_type = typename std::conditional<std::is_integral<T>::value, int32_t, float>::type;
-
- template <typename...>
- void setup(TensorShape shape, DataType data_type)
- {
- _target = compute_target(shape, data_type);
- _reference = compute_reference(shape, data_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- MinMaxLocationValues<target_type> compute_target(const TensorShape &shape, DataType data_type)
- {
- MinMaxLocationValues<target_type> target;
-
- ArrayType min_loc(shape.total_size());
- ArrayType max_loc(shape.total_size());
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType min_max_loc;
- min_max_loc.configure(&src, &target.min, &target.max, &min_loc, &max_loc);
-
- // Allocate tensors
- src.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- min_max_loc.run();
-
- // Create accessor objects for mapping operations
- ArrayAccessorType min_loc_accessor(min_loc);
- ArrayAccessorType max_loc_accessor(max_loc);
-
- // Move min Coordinates2D values from ArrayType to vector
- for(size_t i = 0; i < min_loc.num_values(); ++i)
- {
- target.min_loc.push_back(std::move(min_loc_accessor.at(i)));
- }
-
- // Move max Coordinates2D values from ArrayType to vector
- for(size_t i = 0; i < max_loc.num_values(); ++i)
- {
- target.max_loc.push_back(std::move(max_loc_accessor.at(i)));
- }
-
- return target;
- }
-
- MinMaxLocationValues<T> compute_reference(const TensorShape &shape, DataType data_type)
- {
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- return reference::min_max_location<T>(src);
- }
-
- MinMaxLocationValues<target_type> _target{};
- MinMaxLocationValues<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_MIN_MAX_LOCATION_FIXTURE */
diff --git a/tests/validation/fixtures/NonLinearFilterFixture.h b/tests/validation/fixtures/NonLinearFilterFixture.h
deleted file mode 100644
index 03d2bcd962..0000000000
--- a/tests/validation/fixtures/NonLinearFilterFixture.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_NONLINEAR_FILTER_FIXTURE
-#define ARM_COMPUTE_TEST_NONLINEAR_FILTER_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/NonLinearFilter.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class NonLinearFilterValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, BorderMode border_mode, DataType data_type)
- {
- std::mt19937 generator(library->seed());
- std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
- const uint8_t constant_border_value = distribution_u8(generator);
-
- // Create the mask
- std::vector<uint8_t> mask(mask_size * mask_size);
- fill_mask_from_pattern(mask.data(), mask_size, mask_size, pattern);
-
- _border_size = BorderSize(static_cast<int>(mask_size / 2));
- _target = compute_target(shape, data_type, function, mask_size, pattern, mask.data(), border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, function, mask_size, pattern, mask.data(), border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode,
- uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType non_linear_filter;
- non_linear_filter.configure(&src, &dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- non_linear_filter.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- return reference::non_linear_filter<T>(src, function, mask_size, pattern, mask, border_mode, constant_border_value);
- }
-
- BorderMode _border_mode{};
- BorderSize _border_size{};
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_NONLINEAR_FILTER_FIXTURE */
diff --git a/tests/validation/fixtures/OpticalFlowFixture.h b/tests/validation/fixtures/OpticalFlowFixture.h
deleted file mode 100644
index 5c3285a601..0000000000
--- a/tests/validation/fixtures/OpticalFlowFixture.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_OPTICAL_FLOW
-#define ARM_COMPUTE_TEST_OPTICAL_FLOW
-
-#include "arm_compute/core/PyramidInfo.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/Types.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/OpticalFlow.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType,
- typename AccessorType,
- typename ArrayType,
- typename ArrayAccessorType,
- typename FunctionType,
- typename PyramidType,
- typename PyramidFunctionType,
- typename T>
-
-class OpticalFlowValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(std::string old_image_name, std::string new_image_name, OpticalFlowParameters params,
- size_t num_levels, size_t num_keypoints, Format format, BorderMode border_mode)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> int_dist(0, 255);
- const uint8_t constant_border_value = int_dist(gen);
-
- // Create keypoints
- std::vector<KeyPoint> old_keypoints = generate_random_keypoints(library->get_image_shape(old_image_name), num_keypoints, library->seed(), num_levels);
- std::vector<KeyPoint> new_keypoints_estimates = old_keypoints;
-
- _target = compute_target(old_image_name, new_image_name, params, num_levels, old_keypoints, new_keypoints_estimates, format, border_mode, constant_border_value);
- _reference = compute_reference(old_image_name, new_image_name, params, num_levels, old_keypoints, new_keypoints_estimates, format, border_mode, constant_border_value);
- }
-
-protected:
- template <typename V>
- void fill(V &&tensor, const std::string image, Format format)
- {
- library->fill(tensor, image, format);
- }
-
- ArrayType compute_target(std::string old_image_name, std::string new_image_name, OpticalFlowParameters params, size_t num_levels,
- std::vector<KeyPoint> &old_keypoints, std::vector<KeyPoint> &new_keypoints_estimates,
- Format format, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Get image shapes
- TensorShape old_shape = library->get_image_shape(old_image_name);
- TensorShape new_shape = library->get_image_shape(new_image_name);
-
- // Create tensors
- auto old_image = create_tensor<TensorType>(old_shape, format);
- auto new_image = create_tensor<TensorType>(new_shape, format);
-
- // Load keypoints
- ArrayType old_points(old_keypoints.size());
- ArrayType new_points_estimates(new_keypoints_estimates.size());
- ArrayType new_points(old_keypoints.size());
-
- fill_array(ArrayAccessorType(old_points), old_keypoints);
- fill_array(ArrayAccessorType(new_points_estimates), new_keypoints_estimates);
-
- // Create pyramid images
- PyramidInfo pyramid_info(num_levels, SCALE_PYRAMID_HALF, old_image.info()->tensor_shape(), format);
- PyramidType old_pyramid = create_pyramid<PyramidType>(pyramid_info);
- PyramidType new_pyramid = create_pyramid<PyramidType>(pyramid_info);
-
- // Create and configure pyramid functions
- PyramidFunctionType old_gp;
- old_gp.configure(&old_image, &old_pyramid, border_mode, constant_border_value);
-
- PyramidFunctionType new_gp;
- new_gp.configure(&new_image, &new_pyramid, border_mode, constant_border_value);
-
- for(size_t i = 0; i < pyramid_info.num_levels(); ++i)
- {
- ARM_COMPUTE_EXPECT(old_pyramid.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(new_pyramid.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Create and configure optical flow function
- FunctionType optical_flow;
-
- optical_flow.configure(&old_pyramid,
- &new_pyramid,
- &old_points,
- &new_points_estimates,
- &new_points,
- params.termination,
- params.epsilon,
- params.num_iterations,
- params.window_dimension,
- params.use_initial_estimate,
- border_mode,
- constant_border_value);
-
- ARM_COMPUTE_EXPECT(old_image.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(new_image.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate input tensors
- old_image.allocator()->allocate();
- new_image.allocator()->allocate();
-
- // Allocate pyramids
- old_pyramid.allocate();
- new_pyramid.allocate();
-
- ARM_COMPUTE_EXPECT(!old_image.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!new_image.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- for(size_t i = 0; i < pyramid_info.num_levels(); ++i)
- {
- ARM_COMPUTE_EXPECT(!old_pyramid.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!new_pyramid.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
- }
-
- // Fill tensors
- fill(AccessorType(old_image), old_image_name, format);
- fill(AccessorType(new_image), new_image_name, format);
-
- // Compute functions
- old_gp.run();
- new_gp.run();
- optical_flow.run();
-
- return new_points;
- }
-
- std::vector<KeyPoint> compute_reference(std::string old_image_name, std::string new_image_name,
- OpticalFlowParameters params, size_t num_levels,
- std::vector<KeyPoint> &old_keypoints, std::vector<KeyPoint> &new_keypoints_estimates,
- Format format, BorderMode border_mode, uint8_t constant_border_value)
- {
- SimpleTensor<T> old_image{ library->get_image_shape(old_image_name), data_type_from_format(format) };
- SimpleTensor<T> new_image{ library->get_image_shape(new_image_name), data_type_from_format(format) };
-
- fill(old_image, old_image_name, format);
- fill(new_image, new_image_name, format);
-
- return reference::optical_flow<T>(old_image, new_image, params, num_levels, old_keypoints, new_keypoints_estimates,
- border_mode, constant_border_value);
- }
-
- ArrayType _target{};
- std::vector<KeyPoint> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_OPTICAL_FLOW */
diff --git a/tests/validation/fixtures/PhaseFixture.h b/tests/validation/fixtures/PhaseFixture.h
deleted file mode 100644
index b80d1ae00e..0000000000
--- a/tests/validation/fixtures/PhaseFixture.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_PHASE_FIXTURE
-#define ARM_COMPUTE_TEST_PHASE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Phase.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class PhaseValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, Format format, PhaseType phase_type)
- {
- _target = compute_target(shape, format, phase_type);
- _reference = compute_reference(shape, format, phase_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, std::random_device::result_type seed_offset)
- {
- library->fill_tensor_uniform(tensor, seed_offset);
- }
-
- TensorType compute_target(const TensorShape &shape, Format format, PhaseType phase_type)
- {
- DataType data_type = data_type_from_format(format);
-
- // Create tensors
- TensorType src1 = create_tensor<TensorType>(shape, data_type);
- src1.info()->set_format(format);
-
- TensorType src2 = create_tensor<TensorType>(shape, data_type);
- src2.info()->set_format(format);
-
- TensorType dst = create_tensor<TensorType>(shape, DataType::U8);
- dst.info()->set_format(Format::U8);
-
- // Create and configure function
- FunctionType phase;
-
- phase.configure(&src1, &src2, &dst, phase_type);
-
- ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src1.allocator()->allocate();
- src2.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src1), 0);
- fill(AccessorType(src2), 1);
-
- // Compute function
- phase.run();
-
- return dst;
- }
-
- SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, Format format, PhaseType phase_type)
- {
- DataType data_type = data_type_from_format(format);
-
- // Create reference
- SimpleTensor<T> src1{ shape, data_type };
- SimpleTensor<T> src2{ shape, data_type };
-
- // Fill reference
- fill(src1, 0);
- fill(src2, 1);
-
- return reference::phase<T>(src1, src2, phase_type);
- }
-
- TensorType _target{};
- SimpleTensor<uint8_t> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_PHASE_FIXTURE */
diff --git a/tests/validation/fixtures/SobelFixture.h b/tests/validation/fixtures/SobelFixture.h
deleted file mode 100644
index 61a6a80d70..0000000000
--- a/tests/validation/fixtures/SobelFixture.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_SOBEL_FIXTURE
-#define ARM_COMPUTE_TEST_SOBEL_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Sobel.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class CLSobel3x3;
-class CLSobel5x5;
-class CLSobel7x7;
-class NESobel3x3;
-class NESobel5x5;
-class NESobel7x7;
-
-namespace test
-{
-namespace validation
-{
-namespace
-{
-template <typename Function>
-struct info;
-
-template <>
-struct info<NESobel3x3>
-{
- static const Format dst_format = Format::S16;
- static const int filter_size = 3;
-};
-
-template <>
-struct info<CLSobel3x3>
-{
- static const Format dst_format = Format::S16;
- static const int filter_size = 3;
-};
-
-template <>
-struct info<NESobel5x5>
-{
- static const Format dst_format = Format::S16;
- static const int filter_size = 5;
-};
-
-template <>
-struct info<CLSobel5x5>
-{
- static const Format dst_format = Format::S16;
- static const int filter_size = 5;
-};
-
-template <>
-struct info<NESobel7x7>
-{
- static const Format dst_format = Format::S32;
- static const int filter_size = 7;
-};
-
-template <>
-struct info<CLSobel7x7>
-{
- static const Format dst_format = Format::S32;
- static const int filter_size = 7;
-};
-} // namespace
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename U>
-class SobelValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, BorderMode border_mode, Format format, GradientDimension gradient_dimension)
- {
- // Generate a random constant value
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> int_dist(0, 255);
- const uint8_t constant_border_value = int_dist(gen);
-
- _border_mode = border_mode;
- _target = compute_target(shape, border_mode, format, constant_border_value, gradient_dimension);
- _reference = compute_reference(shape, info<FunctionType>::filter_size, border_mode, format, constant_border_value, gradient_dimension);
- }
-
-protected:
- template <typename V>
- void fill(V &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- std::pair<TensorType, TensorType> compute_target(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value, GradientDimension gradient_dimension)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type_from_format(format));
- TensorType dst_x = create_tensor<TensorType>(shape, data_type_from_format(info<FunctionType>::dst_format));
- TensorType dst_y = create_tensor<TensorType>(shape, data_type_from_format(info<FunctionType>::dst_format));
-
- src.info()->set_format(format);
- dst_x.info()->set_format(info<FunctionType>::dst_format);
- dst_y.info()->set_format(info<FunctionType>::dst_format);
-
- FunctionType sobel;
-
- switch(gradient_dimension)
- {
- case GradientDimension::GRAD_X:
- sobel.configure(&src, &dst_x, nullptr, border_mode, constant_border_value);
- break;
- case GradientDimension::GRAD_Y:
- sobel.configure(&src, nullptr, &dst_y, border_mode, constant_border_value);
- break;
- case GradientDimension::GRAD_XY:
- sobel.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
- break;
- default:
- ARM_COMPUTE_ERROR("Gradient dimension not supported");
- }
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst_x.allocator()->allocate();
- dst_y.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- sobel.run();
-
- return std::make_pair(std::move(dst_x), std::move(dst_y));
- }
-
- std::pair<SimpleTensor<U>, SimpleTensor<U>> compute_reference(const TensorShape &shape, int filter_size, BorderMode border_mode, Format format, uint8_t constant_border_value,
- GradientDimension gradient_dimension)
- {
- // Create reference
- SimpleTensor<T> src{ shape, format };
-
- // Fill reference
- fill(src);
-
- return reference::sobel<U>(src, filter_size, border_mode, constant_border_value, gradient_dimension);
- }
-
- BorderMode _border_mode{ BorderMode::UNDEFINED };
- std::pair<TensorType, TensorType> _target{};
- std::pair<SimpleTensor<U>, SimpleTensor<U>> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SOBEL_FIXTURE */
diff --git a/tests/validation/fixtures/TableLookupFixture.h b/tests/validation/fixtures/TableLookupFixture.h
deleted file mode 100644
index a50c9fb2fe..0000000000
--- a/tests/validation/fixtures/TableLookupFixture.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2017 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_TABLE_LOOKUP_FIXTURE
-#define ARM_COMPUTE_TEST_TABLE_LOOKUP_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/RawLutAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/TableLookup.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename LutAccessorType, typename LutType, typename T>
-class TableLookupValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type)
- {
- _target = compute_target(shape, data_type);
- _reference = compute_reference(shape, data_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor, int i)
- {
- library->fill_tensor_uniform(tensor, i);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type)
- {
- // Create Lut
- const int num_elem = (data_type == DataType::U8) ? std::numeric_limits<uint8_t>::max() + 1 : std::numeric_limits<int16_t>::max() - std::numeric_limits<int16_t>::lowest() + 1;
- LutType lut(num_elem, data_type);
-
- //Fill the Lut
- fill_lookuptable(LutAccessorType(lut));
-
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType table_lookup;
- table_lookup.configure(&src, &lut, &dst);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src), 0);
- fill(AccessorType(dst), 1);
-
- // Compute function
- table_lookup.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
- {
- // Create rawLut
- std::map<T, T> rawlut;
-
- // Fill the Lut
- fill_lookuptable(RawLutAccessor<T>(rawlut));
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src, 0);
-
- return reference::table_lookup(src, rawlut);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_TABLE_LOOKUP_FIXTURE */
diff --git a/tests/validation/fixtures/ThresholdFixture.h b/tests/validation/fixtures/ThresholdFixture.h
deleted file mode 100644
index 038c296178..0000000000
--- a/tests/validation/fixtures/ThresholdFixture.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_THRESHOLD_FIXTURE
-#define ARM_COMPUTE_TEST_THRESHOLD_FIXTURE
-
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Threshold.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ThresholdValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper, DataType data_type)
- {
- _target = compute_target(shape, data_type, threshold, false_value, true_value, type, upper);
- _reference = compute_reference(shape, data_type, threshold, false_value, true_value, type, upper);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type,
- uint8_t threshold, uint8_t false_value, uint8_t true_value,
- ThresholdType type, uint8_t upper)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType thrsh;
- thrsh.configure(&src, &dst, ThresholdKernelInfo(threshold, false_value, true_value, type, upper));
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- thrsh.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type,
- uint8_t threshold, uint8_t false_value, uint8_t true_value,
- ThresholdType type, uint8_t upper)
- {
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Fill reference
- fill(src);
-
- return reference::threshold<T>(src, threshold, false_value, true_value, type, upper);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_THRESHOLD_FIXTURE */
diff --git a/tests/validation/fixtures/WarpAffineFixture.h b/tests/validation/fixtures/WarpAffineFixture.h
deleted file mode 100644
index 014d6628b1..0000000000
--- a/tests/validation/fixtures/WarpAffineFixture.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2017-2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_WARP_AFFINE_FIXTURE
-#define ARM_COMPUTE_TEST_WARP_AFFINE_FIXTURE
-
-#include <memory>
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Utils.h"
-#include "tests/validation/reference/WarpAffine.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class WarpAffineValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape shape, DataType data_type, InterpolationPolicy policy, BorderMode border_mode)
- {
- // Generate a random constant value if border_mode is constant
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
- uint8_t constant_border_value = distribution_u8(gen);
-
- // Create the matrix
- std::array<float, 9> matrix{ {} };
- fill_warp_matrix<9>(matrix);
-
- _target = compute_target(shape, data_type, matrix, policy, border_mode, constant_border_value);
- _reference = compute_reference(shape, data_type, matrix, policy, border_mode, constant_border_value);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, DataType data_type, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType warp_affine;
- warp_affine.configure(&src, &dst, matrix, policy, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- warp_affine.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
- {
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Create the valid mask Tensor
- _valid_mask = SimpleTensor<T>(shape, data_type);
-
- // Fill reference
- fill(src);
-
- return reference::warp_affine<T>(src, _valid_mask, matrix.data(), policy, border_mode, constant_border_value);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
- SimpleTensor<T> _valid_mask{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_WARP_AFFINE_FIXTURE */
diff --git a/tests/validation/fixtures/WarpPerspectiveFixture.h b/tests/validation/fixtures/WarpPerspectiveFixture.h
deleted file mode 100644
index 40ae3b956c..0000000000
--- a/tests/validation/fixtures/WarpPerspectiveFixture.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_WARP_PERSPECTIVE_FIXTURE
-#define ARM_COMPUTE_TEST_WARP_PERSPECTIVE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/reference/Utils.h"
-#include "tests/validation/reference/WarpPerspective.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class WarpPerspectiveValidationFixture : public framework::Fixture
-{
-public:
- template <typename...>
- void setup(TensorShape input_shape, DataType data_type, InterpolationPolicy policy, BorderMode border_mode)
- {
- uint8_t constant_border_value = 0;
- // Generate a random constant value if border_mode is constant
- if(border_mode == BorderMode::CONSTANT)
- {
- std::mt19937 gen(library->seed());
- std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
- constant_border_value = distribution_u8(gen);
- }
-
- // Create the matrix
- std::array<float, 9> matrix = { { 0 } };
- fill_warp_matrix<9>(matrix);
-
- _target = compute_target(input_shape, matrix, policy, border_mode, constant_border_value, data_type);
- _reference = compute_reference(input_shape, matrix, policy, border_mode, constant_border_value, data_type);
- }
-
-protected:
- template <typename U>
- void fill(U &&tensor)
- {
- library->fill_tensor_uniform(tensor, 0);
- }
-
- TensorType compute_target(const TensorShape &shape, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
- uint8_t constant_border_value,
- DataType data_type)
- {
- // Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
-
- // Create and configure function
- FunctionType warp_perspective;
- warp_perspective.configure(&src, &dst, matrix, policy, border_mode, constant_border_value);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Allocate tensors
- src.allocator()->allocate();
- dst.allocator()->allocate();
-
- ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Fill tensors
- fill(AccessorType(src));
-
- // Compute function
- warp_perspective.run();
-
- return dst;
- }
-
- SimpleTensor<T> compute_reference(const TensorShape &shape, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
- uint8_t constant_border_value,
- DataType data_type)
- {
- ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
-
- // Create reference
- SimpleTensor<T> src{ shape, data_type };
-
- // Create the valid mask Tensor
- _valid_mask = SimpleTensor<T>(shape, data_type);
-
- // Fill reference
- fill(src);
-
- // Compute reference
- return reference::warp_perspective<T>(src, _valid_mask, matrix.data(), policy, border_mode, constant_border_value);
- }
-
- TensorType _target{};
- SimpleTensor<T> _reference{};
- BorderMode _border_mode{};
- SimpleTensor<T> _valid_mask{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_WARP_PERSPECTIVE_FIXTURE */
diff --git a/tests/validation/reference/Convolution.cpp b/tests/validation/reference/Convolution.cpp
deleted file mode 100644
index 0a4e04392e..0000000000
--- a/tests/validation/reference/Convolution.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Helpers.h"
-
-#include "Convolution.h"
-#include "Utils.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
- const unsigned int width,
- const unsigned int height)
-{
- ARM_COMPUTE_ERROR_ON(scale == 0);
- ARM_COMPUTE_ERROR_ON(scale >= static_cast<unsigned int>(std::numeric_limits<int32_t>::max()));
-
- SimpleTensor<T> dst(src.shape(), output_data_type);
- SimpleTensor<int32_t> sum(src.shape(), output_data_type);
- const uint32_t num_elements = src.num_elements();
-#if defined(_OPENMP)
- #pragma omp parallel for
-#endif /* _OPENMP */
- for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx)
- {
- const Coordinates id = index2coord(src.shape(), element_idx);
- apply_2d_spatial_filter(id, src, sum, TensorShape(width, height), conv, 1, border_mode, constant_border_value);
- dst[element_idx] = saturate_cast<T>(tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / static_cast<int>(scale));
- }
-
- return dst;
-}
-
-template SimpleTensor<uint8_t> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
- const unsigned int widht, const unsigned int height);
-template SimpleTensor<int16_t> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
- const unsigned int widht, const unsigned int height);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/reference/Convolution.h b/tests/validation/reference/Convolution.h
deleted file mode 100644
index 174ce7e2ef..0000000000
--- a/tests/validation/reference/Convolution.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_CONVOLUTION_H
-#define ARM_COMPUTE_TEST_CONVOLUTION_H
-
-#include "tests/SimpleTensor.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> convolution(const SimpleTensor<uint8_t> &src, DataType output_data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
- const unsigned int width,
- const unsigned int height);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_CONVOLUTION_H */